diff --git "a/profile_trace/iteration_14848/rank7_trace.json" "b/profile_trace/iteration_14848/rank7_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_14848/rank7_trace.json" @@ -0,0 +1,68774 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 7, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "BBF6D1CA5C0C481C88960A6576FC8ED6", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367620701.540, "dur": 104.133, + "args": { + "External id": 293377,"Record function id": 0, "Sequence number": 1209231, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367620717.512, "dur": 80.909, + "args": { + "External id": 293378,"Sequence number": 1209231, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2070554, "tid": 2107619, "ts": 5333367620717.512, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070554, "tid": 2107619, + "ts": 5333367620724.784, "dur": 71.706, + "args": { + "External id": 293379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367620818.188, "dur": 139.744, + "args": { + "External id": 293380,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367620867.291, "dur": 79.283, + "args": { + "External id": 293381,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.31", "pid": 2070554, "tid": 2107619, + "ts": 5333367620893.565, "dur": 42.410, + "args": { + "External id": 293382,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367620951.505, "dur": 1.822, + "args": { + "External id": 293383,"Sequence number": 1209230, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2070554, "tid": 2107619, "ts": 5333367620951.505, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearListNetFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367620964.534, "dur": 77394.539, + "args": { + "External id": 293384,"Record function id": 0, "Sequence number": 1209229, "Fwd thread id": 1, "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367620969.010, "dur": 77339.379, + "args": { + "External id": 293385,"Sequence number": 1209229, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8 + } + }, + { + "ph": "f", "id": 3, "pid": 2070554, "tid": 2107619, "ts": 5333367620969.010, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367620999.648, "dur": 4.020, + "args": { + "External id": 293386,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367621006.655, "dur": 77178.169, + "args": { + "External id": 293387,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367621008.633, "dur": 77175.703, + "args": { + "External id": 293388,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367621011.571, "dur": 7.844, + "args": { + "External id": 293389,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367621022.908, "dur": 77159.193, + "args": { + "External id": 293390,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070554, "tid": 2107619, + "ts": 5333367698191.013, "dur": 0.451, + "args": { + "External id": 293391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070554, "tid": 2107619, + "ts": 5333367698193.823, "dur": 3.168, + "args": { + "External id": 293392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070554, "tid": 2107619, + "ts": 5333367698195.832, "dur": 1.014, + "args": { + "External id": 293393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070554, "tid": 2107619, + "ts": 5333367698204.177, "dur": 38.725, + "args": { + "External id": 293394,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070554, "tid": 2107619, + "ts": 5333367698253.819, "dur": 48.126, + "args": { + "External id": 293395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070554, "tid": 2107619, + "ts": 5333367698255.371, "dur": 46.341, + "args": { + "External id": 293396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070554, "tid": 2107619, + "ts": 5333367698256.945, "dur": 44.457, + "args": { + "External id": 293397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367698314.882, "dur": 35.921, + "args": { + "External id": 293398,"Record function id": 0, "Concrete Inputs": ["", "15", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], []], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367698316.470, "dur": 34.096, + "args": { + "External id": 293399,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367698323.492, "dur": 5.203, + "args": { + "External id": 293400,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367698329.812, "dur": 20.271, + "args": { + "External id": 293401,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367698373.544, "dur": 11.500, + "args": { + "External id": 293402,"Record function id": 0, "Ev Idx": 25 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367698376.358, "dur": 7.216, + "args": { + "External id": 293403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367698379.174, "dur": 4.030, + "args": { + "External id": 293404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367698379.887, "dur": 3.186, + "args": { + "External id": 293405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367698388.742, "dur": 16.022, + "args": { + "External id": 293406,"Record function id": 0, "Sequence number": 1209228, "Fwd thread id": 1, "Ev Idx": 29 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367698389.993, "dur": 12.881, + "args": { + "External id": 293407,"Sequence number": 1209228, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 30 + } + }, + { + "ph": "f", "id": 4, "pid": 2070554, "tid": 2107619, "ts": 5333367698389.993, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070554, "tid": 2107619, + "ts": 5333367698395.464, "dur": 7.129, + "args": { + "External id": 293408,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367698397.882, "dur": 4.520, + "args": { + "External id": 293409,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367698410.024, "dur": 180.157, + "args": { + "External id": 293410,"Record function id": 0, "Sequence number": 1209227, "Fwd thread id": 1, "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367698418.284, "dur": 165.712, + "args": { + "External id": 293411,"Sequence number": 1209227, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 34 + } + }, + { + "ph": "f", "id": 5, "pid": 2070554, "tid": 2107619, "ts": 5333367698418.284, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367698456.524, "dur": 2.298, + "args": { + "External id": 293412,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367698461.038, "dur": 48.868, + "args": { + "External id": 293413,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367698461.813, "dur": 47.726, + "args": { + "External id": 293414,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367698463.021, "dur": 6.796, + "args": { + "External id": 293415,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367698470.643, "dur": 38.489, + "args": { + "External id": 293416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 39 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070554, "tid": 2107619, + "ts": 5333367698511.453, "dur": 0.161, + "args": { + "External id": 293417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070554, "tid": 2107619, + "ts": 5333367698512.732, "dur": 2.991, + "args": { + "External id": 293418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070554, "tid": 2107619, + "ts": 5333367698515.062, "dur": 0.362, + "args": { + "External id": 293419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070554, "tid": 2107619, + "ts": 5333367698518.157, "dur": 18.271, + "args": { + "External id": 293420,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070554, "tid": 2107619, + "ts": 5333367698540.167, "dur": 36.965, + "args": { + "External id": 293421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070554, "tid": 2107619, + "ts": 5333367698540.820, "dur": 36.113, + "args": { + "External id": 293422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070554, "tid": 2107619, + "ts": 5333367698542.721, "dur": 33.897, + "args": { + "External id": 293423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367698595.782, "dur": 6.402, + "args": { + "External id": 293424,"Record function id": 0, "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367698597.744, "dur": 3.758, + "args": { + "External id": 293425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367698598.943, "dur": 1.883, + "args": { + "External id": 293426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 49 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367698599.618, "dur": 1.094, + "args": { + "External id": 293427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367698606.886, "dur": 76.684, + "args": { + "External id": 293428,"Record function id": 0, "Sequence number": 1209226, "Fwd thread id": 1, "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070554, "tid": 2107619, + "ts": 5333367698608.098, "dur": 5.962, + "args": { + "External id": 293429,"Sequence number": 1209226, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 52 + } + }, + { + "ph": "f", "id": 6, "pid": 2070554, "tid": 2107619, "ts": 5333367698608.098, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070554, "tid": 2107619, + "ts": 5333367698609.772, "dur": 4.104, + "args": { + "External id": 293430,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367698612.308, "dur": 1.451, + "args": { + "External id": 293431,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070554, "tid": 2107619, + "ts": 5333367698617.732, "dur": 58.188, + "args": { + "External id": 293432,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367698690.213, "dur": 328.011, + "args": { + "External id": 293433,"Record function id": 0, "Sequence number": 1209225, "Fwd thread id": 1, "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367698691.856, "dur": 318.450, + "args": { + "External id": 293434,"Sequence number": 1209225, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 57 + } + }, + { + "ph": "f", "id": 7, "pid": 2070554, "tid": 2107619, "ts": 5333367698691.856, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367698839.788, "dur": 44.530, + "args": { + "External id": 293435,"kernel_hash": "ceuhgo46kaqbnwlditj3h4z52obosunxjs35ou76xc47fiiswkar", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuhgo46kaqbnwlditj3h4z52obosunxjs35ou76xc47fiiswkar.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367698921.715, "dur": 29.700, + "args": { + "External id": 293436,"kernel_hash": "c4akubwlpa24tvlbvx7jydmsxo5wvd2yhzmkuenzmf65zqjkqtyl", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/4a/c4akubwlpa24tvlbvx7jydmsxo5wvd2yhzmkuenzmf65zqjkqtyl.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 59 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367698967.859, "dur": 22.215, + "args": { + "External id": 293437,"kernel_hash": "cdbhfghffo4a54xshvxcoylttdt6djirisy6yibfmduah5ybzscb", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/db/cdbhfghffo4a54xshvxcoylttdt6djirisy6yibfmduah5ybzscb.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367699025.412, "dur": 8.777, + "args": { + "External id": 293438,"Record function id": 0, "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367699027.131, "dur": 6.237, + "args": { + "External id": 293439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367699029.548, "dur": 3.174, + "args": { + "External id": 293440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367699030.501, "dur": 2.103, + "args": { + "External id": 293441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367699038.234, "dur": 2660.245, + "args": { + "External id": 293442,"Record function id": 0, "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.31)", "pid": 2070554, "tid": 2107619, + "ts": 5333367699061.396, "dur": 877.244, + "args": { + "External id": 293443,"Record function id": 0, "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.30", "pid": 2070554, "tid": 2107619, + "ts": 5333367699083.093, "dur": 847.705, + "args": { + "External id": 293444,"Record function id": 0, "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070554, "tid": 2107619, + "ts": 5333367699096.973, "dur": 819.008, + "args": { + "External id": 293445,"Record function id": 0, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367699192.085, "dur": 7.103, + "args": { + "External id": 293446,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 69 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367699214.895, "dur": 32.310, + "args": { + "External id": 293447,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699220.471, "dur": 3.515, + "args": { + "External id": 293448,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699225.094, "dur": 0.665, + "args": { + "External id": 293449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699226.655, "dur": 2.124, + "args": { + "External id": 293450,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699231.805, "dur": 0.532, + "args": { + "External id": 293451,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 74 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699233.044, "dur": 0.317, + "args": { + "External id": 293452,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699233.811, "dur": 0.296, + "args": { + "External id": 293453,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699237.318, "dur": 0.320, + "args": { + "External id": 293454,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699238.227, "dur": 0.269, + "args": { + "External id": 293455,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699240.228, "dur": 2.083, + "args": { + "External id": 293456,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367699256.944, "dur": 34.266, + "args": { + "External id": 293457,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367699326.461, "dur": 104.757, + "args": { + "External id": 293458,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367699337.556, "dur": 4.078, + "args": { + "External id": 293459,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367699346.764, "dur": 12.170, + "args": { + "External id": 293460,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367699351.076, "dur": 7.438, + "args": { + "External id": 293461,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699355.271, "dur": 2.068, + "args": { + "External id": 293462,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367699365.366, "dur": 24.221, + "args": { + "External id": 293463,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699367.228, "dur": 0.494, + "args": { + "External id": 293464,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699369.672, "dur": 0.546, + "args": { + "External id": 293465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699370.955, "dur": 2.212, + "args": { + "External id": 293466,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 89 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699373.665, "dur": 0.257, + "args": { + "External id": 293467,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699375.074, "dur": 0.488, + "args": { + "External id": 293468,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699378.492, "dur": 0.266, + "args": { + "External id": 293469,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699380.486, "dur": 0.394, + "args": { + "External id": 293470,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 93 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699381.361, "dur": 1.894, + "args": { + "External id": 293471,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367699385.368, "dur": 0.365, + "args": { + "External id": 293472,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367699400.617, "dur": 22.906, + "args": { + "External id": 293473,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367699486.933, "dur": 328.354, + "args": { + "External id": 293474,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367699516.694, "dur": 293.473, + "args": { + "External id": 293475,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 98, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367699527.007, "dur": 276.595, + "args": { + "External id": 293476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367699839.479, "dur": 2.592, + "args": { + "External id": 293477,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 100, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367699946.783, "dur": 1727.886, + "args": { + "External id": 293478,"Sequence number": 1209224, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 101 + } + }, + { + "ph": "f", "id": 8, "pid": 2070554, "tid": 2107619, "ts": 5333367699946.783, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700061.977, "dur": 145.690, + "args": { + "External id": 293479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367700257.546, "dur": 42.613, + "args": { + "External id": 293480,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700320.617, "dur": 58.301, + "args": { + "External id": 293481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700388.258, "dur": 36.055, + "args": { + "External id": 293482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700430.402, "dur": 47.687, + "args": { + "External id": 293483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700484.776, "dur": 28.059, + "args": { + "External id": 293484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700521.969, "dur": 44.039, + "args": { + "External id": 293485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367700591.299, "dur": 24.121, + "args": { + "External id": 293486,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367700677.953, "dur": 31.901, + "args": { + "External id": 293487,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367700734.906, "dur": 20.552, + "args": { + "External id": 293488,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367700772.300, "dur": 16.818, + "args": { + "External id": 293489,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700799.540, "dur": 36.534, + "args": { + "External id": 293490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367700839.310, "dur": 33.583, + "args": { + "External id": 293491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367700902.364, "dur": 177.203, + "args": { + "External id": 293492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367700985.804, "dur": 5.719, + "args": { + "External id": 293493,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367700993.284, "dur": 1.938, + "args": { + "External id": 293494,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367701110.934, "dur": 26.231, + "args": { + "External id": 293495,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367701150.890, "dur": 33.357, + "args": { + "External id": 293496,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367701197.391, "dur": 42.771, + "args": { + "External id": 293497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367701249.171, "dur": 36.052, + "args": { + "External id": 293498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367701292.011, "dur": 22.656, + "args": { + "External id": 293499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367701318.951, "dur": 29.292, + "args": { + "External id": 293500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367701357.065, "dur": 21.994, + "args": { + "External id": 293501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367701384.818, "dur": 29.234, + "args": { + "External id": 293502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367701437.043, "dur": 24.821, + "args": { + "External id": 293503,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367701479.912, "dur": 27.996, + "args": { + "External id": 293504,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367701522.780, "dur": 19.533, + "args": { + "External id": 293505,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367701559.437, "dur": 14.364, + "args": { + "External id": 293506,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367701588.455, "dur": 19.252, + "args": { + "External id": 293507,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701720.875, "dur": 14.196, + "args": { + "External id": 293508,"Record function id": 0, "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701723.769, "dur": 10.396, + "args": { + "External id": 293509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701727.681, "dur": 5.375, + "args": { + "External id": 293510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701729.256, "dur": 3.671, + "args": { + "External id": 293511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701738.958, "dur": 4.805, + "args": { + "External id": 293512,"Record function id": 0, "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701740.363, "dur": 2.950, + "args": { + "External id": 293513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701741.180, "dur": 1.688, + "args": { + "External id": 293514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701741.864, "dur": 0.915, + "args": { + "External id": 293515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701747.141, "dur": 4.091, + "args": { + "External id": 293516,"Record function id": 0, "Ev Idx": 139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701748.225, "dur": 2.514, + "args": { + "External id": 293517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701748.861, "dur": 1.457, + "args": { + "External id": 293518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701749.566, "dur": 0.646, + "args": { + "External id": 293519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701754.466, "dur": 3.886, + "args": { + "External id": 293520,"Record function id": 0, "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701755.527, "dur": 2.395, + "args": { + "External id": 293521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701756.074, "dur": 1.404, + "args": { + "External id": 293522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701756.653, "dur": 0.732, + "args": { + "External id": 293523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701761.472, "dur": 4.621, + "args": { + "External id": 293524,"Record function id": 0, "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701763.635, "dur": 2.035, + "args": { + "External id": 293525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701764.202, "dur": 0.963, + "args": { + "External id": 293526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701764.497, "dur": 0.601, + "args": { + "External id": 293527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701769.153, "dur": 3.436, + "args": { + "External id": 293528,"Record function id": 0, "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701770.178, "dur": 1.992, + "args": { + "External id": 293529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701770.658, "dur": 1.086, + "args": { + "External id": 293530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701771.018, "dur": 0.662, + "args": { + "External id": 293531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701775.991, "dur": 7.622, + "args": { + "External id": 293532,"Record function id": 0, "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701776.899, "dur": 6.267, + "args": { + "External id": 293533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701777.366, "dur": 5.360, + "args": { + "External id": 293534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701779.554, "dur": 3.078, + "args": { + "External id": 293535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701786.726, "dur": 3.687, + "args": { + "External id": 293536,"Record function id": 0, "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701787.713, "dur": 2.289, + "args": { + "External id": 293537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701788.379, "dur": 1.182, + "args": { + "External id": 293538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701788.673, "dur": 0.812, + "args": { + "External id": 293539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701793.466, "dur": 3.191, + "args": { + "External id": 293540,"Record function id": 0, "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367701794.369, "dur": 1.838, + "args": { + "External id": 293541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701794.836, "dur": 0.920, + "args": { + "External id": 293542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367701795.116, "dur": 0.566, + "args": { + "External id": 293543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367701801.559, "dur": 37808.705, + "args": { + "External id": 293544,"Record function id": 0, "Sequence number": 1209223, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367701802.750, "dur": 37799.058, + "args": { + "External id": 293545,"Sequence number": 1209223, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 9, "pid": 2070554, "tid": 2107619, "ts": 5333367701802.750, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.31)", "pid": 2070554, "tid": 2107619, + "ts": 5333367701833.517, "dur": 41.904, + "args": { + "External id": 293546,"Record function id": 0, "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.31)", "pid": 2070554, "tid": 2107619, + "ts": 5333367701883.733, "dur": 68.130, + "args": { + "External id": 293547,"Record function id": 0, "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.31)", "pid": 2070554, "tid": 2107619, + "ts": 5333367701960.975, "dur": 37629.778, + "args": { + "External id": 293548,"Record function id": 0, "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367702020.114, "dur": 7.545, + "args": { + "External id": 293549,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367702036.994, "dur": 4.632, + "args": { + "External id": 293550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367702061.045, "dur": 36622.008, + "args": { + "External id": 293551,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367702074.522, "dur": 36599.529, + "args": { + "External id": 293552,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367702118.808, "dur": 4.055, + "args": { + "External id": 293553,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367702132.401, "dur": 36480.925, + "args": { + "External id": 293554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367702136.614, "dur": 36476.102, + "args": { + "External id": 293555,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367702139.478, "dur": 4.641, + "args": { + "External id": 293556,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367702145.677, "dur": 36463.400, + "args": { + "External id": 293557,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367738777.427, "dur": 8.314, + "args": { + "External id": 293558,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367738779.889, "dur": 5.559, + "args": { + "External id": 293559,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367738823.516, "dur": 444.733, + "args": { + "External id": 293560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367738857.884, "dur": 405.215, + "args": { + "External id": 293561,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 184, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367738870.687, "dur": 385.497, + "args": { + "External id": 293562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367739292.072, "dur": 2.329, + "args": { + "External id": 293563,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 186, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739357.156, "dur": 8.397, + "args": { + "External id": 293564,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739416.243, "dur": 1.514, + "args": { + "External id": 293565,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739433.354, "dur": 1.107, + "args": { + "External id": 293566,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739450.577, "dur": 0.891, + "args": { + "External id": 293567,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739467.551, "dur": 2.799, + "args": { + "External id": 293568,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739481.632, "dur": 1.190, + "args": { + "External id": 293569,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739496.502, "dur": 0.603, + "args": { + "External id": 293570,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739509.745, "dur": 5.094, + "args": { + "External id": 293571,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739525.165, "dur": 3.033, + "args": { + "External id": 293572,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367739658.155, "dur": 2709.230, + "args": { + "External id": 293573,"Record function id": 0, "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.30)", "pid": 2070554, "tid": 2107619, + "ts": 5333367739680.707, "dur": 1024.590, + "args": { + "External id": 293574,"Record function id": 0, "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070554, "tid": 2107619, + "ts": 5333367739694.690, "dur": 321.491, + "args": { + "External id": 293575,"Record function id": 0, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739773.618, "dur": 4.533, + "args": { + "External id": 293576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739781.211, "dur": 0.801, + "args": { + "External id": 293577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739784.077, "dur": 0.865, + "args": { + "External id": 293578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739787.916, "dur": 0.869, + "args": { + "External id": 293579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739790.376, "dur": 0.760, + "args": { + "External id": 293580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739792.584, "dur": 0.818, + "args": { + "External id": 293581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739794.909, "dur": 3.366, + "args": { + "External id": 293582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739801.267, "dur": 0.731, + "args": { + "External id": 293583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739803.533, "dur": 0.899, + "args": { + "External id": 293584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367739805.906, "dur": 4.178, + "args": { + "External id": 293585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367739827.963, "dur": 157.801, + "args": { + "External id": 293586,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367739848.415, "dur": 133.195, + "args": { + "External id": 293587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367739865.771, "dur": 12.415, + "args": { + "External id": 293588,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367739881.957, "dur": 70.084, + "args": { + "External id": 293589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367739884.179, "dur": 67.536, + "args": { + "External id": 293590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367739888.442, "dur": 7.457, + "args": { + "External id": 293591,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367739897.459, "dur": 53.618, + "args": { + "External id": 293592,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.29", "pid": 2070554, "tid": 2107619, + "ts": 5333367740094.367, "dur": 603.634, + "args": { + "External id": 293593,"Record function id": 0, "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070554, "tid": 2107619, + "ts": 5333367740109.506, "dur": 575.704, + "args": { + "External id": 293594,"Record function id": 0, "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367740159.471, "dur": 4.642, + "args": { + "External id": 293595,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367740196.443, "dur": 33.156, + "args": { + "External id": 293596,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740201.141, "dur": 1.689, + "args": { + "External id": 293597,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740205.092, "dur": 2.116, + "args": { + "External id": 293598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740208.484, "dur": 0.615, + "args": { + "External id": 293599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740211.422, "dur": 0.499, + "args": { + "External id": 293600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740213.045, "dur": 0.540, + "args": { + "External id": 293601,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740215.022, "dur": 1.311, + "args": { + "External id": 293602,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740220.376, "dur": 0.479, + "args": { + "External id": 293603,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740222.014, "dur": 0.601, + "args": { + "External id": 293604,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740225.010, "dur": 0.337, + "args": { + "External id": 293605,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367740239.339, "dur": 36.590, + "args": { + "External id": 293606,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367740306.931, "dur": 102.505, + "args": { + "External id": 293607,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367740317.471, "dur": 5.533, + "args": { + "External id": 293608,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367740327.854, "dur": 9.583, + "args": { + "External id": 293609,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367740331.971, "dur": 5.031, + "args": { + "External id": 293610,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740335.333, "dur": 0.453, + "args": { + "External id": 293611,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367740343.934, "dur": 26.070, + "args": { + "External id": 293612,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740346.339, "dur": 0.365, + "args": { + "External id": 293613,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740348.021, "dur": 0.690, + "args": { + "External id": 293614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740350.768, "dur": 0.582, + "args": { + "External id": 293615,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740352.741, "dur": 0.530, + "args": { + "External id": 293616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740354.719, "dur": 1.712, + "args": { + "External id": 293617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740358.034, "dur": 0.619, + "args": { + "External id": 293618,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740359.725, "dur": 2.624, + "args": { + "External id": 293619,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740364.759, "dur": 0.546, + "args": { + "External id": 293620,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367740366.553, "dur": 0.329, + "args": { + "External id": 293621,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367740380.606, "dur": 21.341, + "args": { + "External id": 293622,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367740453.685, "dur": 126.435, + "args": { + "External id": 293623,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367740486.960, "dur": 89.828, + "args": { + "External id": 293624,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 247, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367740496.921, "dur": 76.017, + "args": { + "External id": 293625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367740595.916, "dur": 1.961, + "args": { + "External id": 293626,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 249, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367740712.746, "dur": 1634.055, + "args": { + "External id": 293627,"Sequence number": 1209222, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 250 + } + }, + { + "ph": "f", "id": 10, "pid": 2070554, "tid": 2107619, "ts": 5333367740712.746, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367740818.555, "dur": 109.717, + "args": { + "External id": 293628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367740971.051, "dur": 36.692, + "args": { + "External id": 293629,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741025.337, "dur": 53.799, + "args": { + "External id": 293630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741088.552, "dur": 33.258, + "args": { + "External id": 293631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741127.800, "dur": 65.647, + "args": { + "External id": 293632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741205.747, "dur": 32.387, + "args": { + "External id": 293633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741244.680, "dur": 42.663, + "args": { + "External id": 293634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367741312.854, "dur": 26.321, + "args": { + "External id": 293635,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367741356.280, "dur": 29.934, + "args": { + "External id": 293636,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367741408.184, "dur": 23.920, + "args": { + "External id": 293637,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367741447.204, "dur": 15.835, + "args": { + "External id": 293638,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741473.404, "dur": 30.532, + "args": { + "External id": 293639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741507.063, "dur": 33.136, + "args": { + "External id": 293640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367741566.570, "dur": 207.335, + "args": { + "External id": 293641,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367741680.050, "dur": 6.234, + "args": { + "External id": 293642,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367741688.139, "dur": 3.927, + "args": { + "External id": 293643,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367741805.953, "dur": 26.523, + "args": { + "External id": 293644,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367741845.202, "dur": 15.447, + "args": { + "External id": 293645,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741867.898, "dur": 42.601, + "args": { + "External id": 293646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741915.993, "dur": 37.762, + "args": { + "External id": 293647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741960.193, "dur": 23.652, + "args": { + "External id": 293648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367741990.161, "dur": 30.209, + "args": { + "External id": 293649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367742026.929, "dur": 21.805, + "args": { + "External id": 293650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367742055.790, "dur": 45.150, + "args": { + "External id": 293651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367742126.788, "dur": 23.874, + "args": { + "External id": 293652,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367742186.704, "dur": 28.045, + "args": { + "External id": 293653,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367742232.090, "dur": 21.243, + "args": { + "External id": 293654,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367742270.497, "dur": 14.390, + "args": { + "External id": 293655,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367742300.527, "dur": 18.416, + "args": { + "External id": 293656,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742388.697, "dur": 13.893, + "args": { + "External id": 293657,"Record function id": 0, "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742391.997, "dur": 9.649, + "args": { + "External id": 293658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742395.926, "dur": 4.857, + "args": { + "External id": 293659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742397.066, "dur": 3.631, + "args": { + "External id": 293660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742406.371, "dur": 5.009, + "args": { + "External id": 293661,"Record function id": 0, "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742407.862, "dur": 3.041, + "args": { + "External id": 293662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742408.643, "dur": 1.793, + "args": { + "External id": 293663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742409.392, "dur": 0.915, + "args": { + "External id": 293664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742414.682, "dur": 5.060, + "args": { + "External id": 293665,"Record function id": 0, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742416.024, "dur": 3.253, + "args": { + "External id": 293666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742416.949, "dur": 1.874, + "args": { + "External id": 293667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742417.921, "dur": 0.781, + "args": { + "External id": 293668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742422.998, "dur": 6.006, + "args": { + "External id": 293669,"Record function id": 0, "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742424.204, "dur": 4.377, + "args": { + "External id": 293670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742424.902, "dur": 3.239, + "args": { + "External id": 293671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742425.332, "dur": 2.736, + "args": { + "External id": 293672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742432.118, "dur": 4.069, + "args": { + "External id": 293673,"Record function id": 0, "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742433.353, "dur": 2.369, + "args": { + "External id": 293674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742433.853, "dur": 1.277, + "args": { + "External id": 293675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742434.248, "dur": 0.788, + "args": { + "External id": 293676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742439.255, "dur": 3.737, + "args": { + "External id": 293677,"Record function id": 0, "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742440.476, "dur": 2.073, + "args": { + "External id": 293678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742441.086, "dur": 1.035, + "args": { + "External id": 293679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742441.447, "dur": 0.603, + "args": { + "External id": 293680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742446.395, "dur": 5.599, + "args": { + "External id": 293681,"Record function id": 0, "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742447.747, "dur": 3.820, + "args": { + "External id": 293682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742448.291, "dur": 2.869, + "args": { + "External id": 293683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742450.214, "dur": 0.859, + "args": { + "External id": 293684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742455.128, "dur": 3.294, + "args": { + "External id": 293685,"Record function id": 0, "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742456.193, "dur": 1.800, + "args": { + "External id": 293686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742456.646, "dur": 0.938, + "args": { + "External id": 293687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742456.939, "dur": 0.554, + "args": { + "External id": 293688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742461.448, "dur": 3.380, + "args": { + "External id": 293689,"Record function id": 0, "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367742462.612, "dur": 1.786, + "args": { + "External id": 293690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742463.073, "dur": 0.920, + "args": { + "External id": 293691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367742463.346, "dur": 0.577, + "args": { + "External id": 293692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367742468.443, "dur": 36457.083, + "args": { + "External id": 293693,"Record function id": 0, "Sequence number": 1209221, "Fwd thread id": 1, "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367742469.546, "dur": 36447.561, + "args": { + "External id": 293694,"Sequence number": 1209221, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 317 + } + }, + { + "ph": "f", "id": 11, "pid": 2070554, "tid": 2107619, "ts": 5333367742469.546, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.30)", "pid": 2070554, "tid": 2107619, + "ts": 5333367742499.677, "dur": 37.733, + "args": { + "External id": 293695,"Record function id": 0, "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.30)", "pid": 2070554, "tid": 2107619, + "ts": 5333367742545.081, "dur": 59.886, + "args": { + "External id": 293696,"Record function id": 0, "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.30)", "pid": 2070554, "tid": 2107619, + "ts": 5333367742611.990, "dur": 36297.731, + "args": { + "External id": 293697,"Record function id": 0, "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367742738.264, "dur": 7.366, + "args": { + "External id": 293698,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367742756.086, "dur": 5.227, + "args": { + "External id": 293699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367742776.126, "dur": 35308.748, + "args": { + "External id": 293700,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367742789.518, "dur": 35286.432, + "args": { + "External id": 293701,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367742828.524, "dur": 15.719, + "args": { + "External id": 293702,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367742851.779, "dur": 35187.137, + "args": { + "External id": 293703,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367742855.276, "dur": 35183.004, + "args": { + "External id": 293704,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367742859.038, "dur": 4.837, + "args": { + "External id": 293705,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367742865.567, "dur": 35169.303, + "args": { + "External id": 293706,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367778181.198, "dur": 9.942, + "args": { + "External id": 293707,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367778184.310, "dur": 6.347, + "args": { + "External id": 293708,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367778222.293, "dur": 350.516, + "args": { + "External id": 293709,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367778252.058, "dur": 316.365, + "args": { + "External id": 293710,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 333, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367778264.293, "dur": 298.705, + "args": { + "External id": 293711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367778592.567, "dur": 2.484, + "args": { + "External id": 293712,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 335, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778687.900, "dur": 6.922, + "args": { + "External id": 293713,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778741.641, "dur": 1.305, + "args": { + "External id": 293714,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778759.807, "dur": 1.234, + "args": { + "External id": 293715,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778776.270, "dur": 1.018, + "args": { + "External id": 293716,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778789.849, "dur": 0.845, + "args": { + "External id": 293717,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778802.361, "dur": 0.705, + "args": { + "External id": 293718,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778816.318, "dur": 0.778, + "args": { + "External id": 293719,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778831.462, "dur": 1.676, + "args": { + "External id": 293720,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367778843.560, "dur": 0.607, + "args": { + "External id": 293721,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367778939.942, "dur": 2761.064, + "args": { + "External id": 293722,"Record function id": 0, "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.29)", "pid": 2070554, "tid": 2107619, + "ts": 5333367778958.639, "dur": 1029.096, + "args": { + "External id": 293723,"Record function id": 0, "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070554, "tid": 2107619, + "ts": 5333367778976.819, "dur": 337.794, + "args": { + "External id": 293724,"Record function id": 0, "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779050.174, "dur": 4.023, + "args": { + "External id": 293725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779057.513, "dur": 0.840, + "args": { + "External id": 293726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779060.078, "dur": 1.101, + "args": { + "External id": 293727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779064.478, "dur": 2.327, + "args": { + "External id": 293728,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779068.118, "dur": 0.667, + "args": { + "External id": 293729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779070.192, "dur": 0.895, + "args": { + "External id": 293730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779072.647, "dur": 2.023, + "args": { + "External id": 293731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779077.731, "dur": 0.599, + "args": { + "External id": 293732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779079.780, "dur": 0.559, + "args": { + "External id": 293733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367779084.950, "dur": 0.693, + "args": { + "External id": 293734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367779103.431, "dur": 180.359, + "args": { + "External id": 293735,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367779118.341, "dur": 160.769, + "args": { + "External id": 293736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367779137.282, "dur": 16.064, + "args": { + "External id": 293737,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367779156.772, "dur": 90.456, + "args": { + "External id": 293738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367779159.031, "dur": 87.874, + "args": { + "External id": 293739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779162.883, "dur": 23.099, + "args": { + "External id": 293740,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367779188.482, "dur": 57.330, + "args": { + "External id": 293741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.28", "pid": 2070554, "tid": 2107619, + "ts": 5333367779397.077, "dur": 582.714, + "args": { + "External id": 293742,"Record function id": 0, "Ev Idx": 365 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070554, "tid": 2107619, + "ts": 5333367779414.135, "dur": 554.223, + "args": { + "External id": 293743,"Record function id": 0, "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367779470.875, "dur": 4.983, + "args": { + "External id": 293744,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367779490.335, "dur": 28.724, + "args": { + "External id": 293745,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779494.806, "dur": 1.716, + "args": { + "External id": 293746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779499.057, "dur": 0.412, + "args": { + "External id": 293747,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779500.670, "dur": 0.396, + "args": { + "External id": 293748,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779502.254, "dur": 1.515, + "args": { + "External id": 293749,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779505.130, "dur": 0.333, + "args": { + "External id": 293750,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779506.746, "dur": 0.436, + "args": { + "External id": 293751,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779509.977, "dur": 2.100, + "args": { + "External id": 293752,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779513.032, "dur": 0.490, + "args": { + "External id": 293753,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779514.741, "dur": 0.414, + "args": { + "External id": 293754,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367779528.997, "dur": 29.860, + "args": { + "External id": 293755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367779589.165, "dur": 142.888, + "args": { + "External id": 293756,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367779598.867, "dur": 3.001, + "args": { + "External id": 293757,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367779606.754, "dur": 11.116, + "args": { + "External id": 293758,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367779610.703, "dur": 6.659, + "args": { + "External id": 293759,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779614.152, "dur": 1.754, + "args": { + "External id": 293760,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367779661.360, "dur": 29.129, + "args": { + "External id": 293761,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779664.039, "dur": 0.721, + "args": { + "External id": 293762,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779666.429, "dur": 0.864, + "args": { + "External id": 293763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779668.682, "dur": 0.421, + "args": { + "External id": 293764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779671.611, "dur": 2.076, + "args": { + "External id": 293765,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779675.051, "dur": 0.339, + "args": { + "External id": 293766,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779676.679, "dur": 1.234, + "args": { + "External id": 293767,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779679.246, "dur": 0.374, + "args": { + "External id": 293768,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779682.880, "dur": 0.367, + "args": { + "External id": 293769,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367779685.837, "dur": 0.327, + "args": { + "External id": 293770,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367779701.215, "dur": 23.002, + "args": { + "External id": 293771,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367779777.872, "dur": 122.610, + "args": { + "External id": 293772,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367779806.819, "dur": 90.256, + "args": { + "External id": 293773,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 396, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367779816.696, "dur": 76.319, + "args": { + "External id": 293774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367779915.675, "dur": 1.966, + "args": { + "External id": 293775,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 398, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367779994.771, "dur": 1684.387, + "args": { + "External id": 293776,"Sequence number": 1209220, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 399 + } + }, + { + "ph": "f", "id": 12, "pid": 2070554, "tid": 2107619, "ts": 5333367779994.771, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780098.883, "dur": 124.212, + "args": { + "External id": 293777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367780266.159, "dur": 38.210, + "args": { + "External id": 293778,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780320.932, "dur": 54.168, + "args": { + "External id": 293779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780385.011, "dur": 33.014, + "args": { + "External id": 293780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780428.078, "dur": 45.536, + "args": { + "External id": 293781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780479.885, "dur": 27.363, + "args": { + "External id": 293782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780514.270, "dur": 42.076, + "args": { + "External id": 293783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367780578.911, "dur": 22.829, + "args": { + "External id": 293784,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367780657.559, "dur": 32.448, + "args": { + "External id": 293785,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367780716.349, "dur": 19.272, + "args": { + "External id": 293786,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367780749.028, "dur": 15.516, + "args": { + "External id": 293787,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780775.140, "dur": 34.972, + "args": { + "External id": 293788,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367780813.461, "dur": 35.700, + "args": { + "External id": 293789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367780883.537, "dur": 173.715, + "args": { + "External id": 293790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367780967.512, "dur": 5.467, + "args": { + "External id": 293791,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367780974.792, "dur": 2.382, + "args": { + "External id": 293792,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367781091.552, "dur": 24.974, + "args": { + "External id": 293793,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367781127.765, "dur": 14.687, + "args": { + "External id": 293794,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367781149.592, "dur": 54.226, + "args": { + "External id": 293795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367781211.930, "dur": 47.592, + "args": { + "External id": 293796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367781269.978, "dur": 23.790, + "args": { + "External id": 293797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367781297.928, "dur": 30.141, + "args": { + "External id": 293798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367781333.430, "dur": 25.265, + "args": { + "External id": 293799,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367781364.594, "dur": 44.454, + "args": { + "External id": 293800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367781437.585, "dur": 24.262, + "args": { + "External id": 293801,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367781480.145, "dur": 29.389, + "args": { + "External id": 293802,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367781524.082, "dur": 16.288, + "args": { + "External id": 293803,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367781561.848, "dur": 14.406, + "args": { + "External id": 293804,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367781590.132, "dur": 16.185, + "args": { + "External id": 293805,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781723.726, "dur": 17.562, + "args": { + "External id": 293806,"Record function id": 0, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781726.991, "dur": 13.346, + "args": { + "External id": 293807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781731.175, "dur": 8.294, + "args": { + "External id": 293808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781732.391, "dur": 6.987, + "args": { + "External id": 293809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781745.142, "dur": 4.431, + "args": { + "External id": 293810,"Record function id": 0, "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781746.398, "dur": 2.733, + "args": { + "External id": 293811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781747.138, "dur": 1.540, + "args": { + "External id": 293812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781747.694, "dur": 0.794, + "args": { + "External id": 293813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781752.832, "dur": 6.212, + "args": { + "External id": 293814,"Record function id": 0, "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781754.109, "dur": 4.446, + "args": { + "External id": 293815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781754.616, "dur": 3.468, + "args": { + "External id": 293816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781755.212, "dur": 2.753, + "args": { + "External id": 293817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781762.207, "dur": 3.979, + "args": { + "External id": 293818,"Record function id": 0, "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781763.527, "dur": 2.219, + "args": { + "External id": 293819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781764.034, "dur": 1.241, + "args": { + "External id": 293820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781764.452, "dur": 0.728, + "args": { + "External id": 293821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781769.289, "dur": 3.663, + "args": { + "External id": 293822,"Record function id": 0, "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781770.385, "dur": 2.144, + "args": { + "External id": 293823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781770.953, "dur": 1.030, + "args": { + "External id": 293824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781771.287, "dur": 0.601, + "args": { + "External id": 293825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781776.088, "dur": 3.616, + "args": { + "External id": 293826,"Record function id": 0, "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781777.138, "dur": 2.151, + "args": { + "External id": 293827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781777.602, "dur": 1.269, + "args": { + "External id": 293828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781778.139, "dur": 0.614, + "args": { + "External id": 293829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781782.898, "dur": 3.456, + "args": { + "External id": 293830,"Record function id": 0, "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781784.008, "dur": 1.854, + "args": { + "External id": 293831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781784.496, "dur": 0.961, + "args": { + "External id": 293832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781784.766, "dur": 0.590, + "args": { + "External id": 293833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781789.485, "dur": 3.670, + "args": { + "External id": 293834,"Record function id": 0, "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781790.798, "dur": 1.935, + "args": { + "External id": 293835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781791.293, "dur": 1.036, + "args": { + "External id": 293836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781791.578, "dur": 0.650, + "args": { + "External id": 293837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781796.266, "dur": 5.271, + "args": { + "External id": 293838,"Record function id": 0, "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367781797.482, "dur": 3.623, + "args": { + "External id": 293839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781797.997, "dur": 2.699, + "args": { + "External id": 293840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367781799.986, "dur": 0.636, + "args": { + "External id": 293841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367781806.092, "dur": 36511.329, + "args": { + "External id": 293842,"Record function id": 0, "Sequence number": 1209219, "Fwd thread id": 1, "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367781807.370, "dur": 36501.898, + "args": { + "External id": 293843,"Sequence number": 1209219, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 466 + } + }, + { + "ph": "f", "id": 13, "pid": 2070554, "tid": 2107619, "ts": 5333367781807.370, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.29)", "pid": 2070554, "tid": 2107619, + "ts": 5333367781838.702, "dur": 37.774, + "args": { + "External id": 293844,"Record function id": 0, "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.29)", "pid": 2070554, "tid": 2107619, + "ts": 5333367781884.732, "dur": 59.091, + "args": { + "External id": 293845,"Record function id": 0, "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.29)", "pid": 2070554, "tid": 2107619, + "ts": 5333367781949.646, "dur": 36351.214, + "args": { + "External id": 293846,"Record function id": 0, "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367782036.777, "dur": 6.748, + "args": { + "External id": 293847,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367782052.809, "dur": 6.161, + "args": { + "External id": 293848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367782075.817, "dur": 35387.899, + "args": { + "External id": 293849,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367782089.005, "dur": 35365.786, + "args": { + "External id": 293850,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367782122.286, "dur": 13.763, + "args": { + "External id": 293851,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367782142.486, "dur": 35272.617, + "args": { + "External id": 293852,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367782144.820, "dur": 35269.621, + "args": { + "External id": 293853,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367782150.150, "dur": 4.609, + "args": { + "External id": 293854,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367782156.377, "dur": 35254.728, + "args": { + "External id": 293855,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367817556.778, "dur": 8.772, + "args": { + "External id": 293856,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367817559.487, "dur": 5.714, + "args": { + "External id": 293857,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367817593.428, "dur": 371.900, + "args": { + "External id": 293858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367817645.356, "dur": 314.913, + "args": { + "External id": 293859,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 482, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367817659.429, "dur": 296.024, + "args": { + "External id": 293860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367817987.901, "dur": 2.355, + "args": { + "External id": 293861,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 484, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818056.644, "dur": 6.244, + "args": { + "External id": 293862,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818108.905, "dur": 1.207, + "args": { + "External id": 293863,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818126.588, "dur": 3.217, + "args": { + "External id": 293864,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818146.479, "dur": 0.744, + "args": { + "External id": 293865,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818160.139, "dur": 0.978, + "args": { + "External id": 293866,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818195.715, "dur": 1.509, + "args": { + "External id": 293867,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818210.116, "dur": 2.662, + "args": { + "External id": 293868,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818225.040, "dur": 1.868, + "args": { + "External id": 293869,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818237.564, "dur": 0.771, + "args": { + "External id": 293870,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367818332.840, "dur": 2786.037, + "args": { + "External id": 293871,"Record function id": 0, "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.28)", "pid": 2070554, "tid": 2107619, + "ts": 5333367818353.501, "dur": 1047.275, + "args": { + "External id": 293872,"Record function id": 0, "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070554, "tid": 2107619, + "ts": 5333367818368.201, "dur": 382.672, + "args": { + "External id": 293873,"Record function id": 0, "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818441.627, "dur": 4.019, + "args": { + "External id": 293874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818448.659, "dur": 0.837, + "args": { + "External id": 293875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818451.400, "dur": 2.256, + "args": { + "External id": 293876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818455.716, "dur": 0.798, + "args": { + "External id": 293877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818457.908, "dur": 0.609, + "args": { + "External id": 293878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818461.502, "dur": 0.713, + "args": { + "External id": 293879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818464.076, "dur": 1.558, + "args": { + "External id": 293880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818467.112, "dur": 0.534, + "args": { + "External id": 293881,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818469.205, "dur": 0.738, + "args": { + "External id": 293882,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367818473.064, "dur": 0.773, + "args": { + "External id": 293883,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367818494.779, "dur": 218.731, + "args": { + "External id": 293884,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367818509.301, "dur": 198.570, + "args": { + "External id": 293885,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367818527.142, "dur": 14.040, + "args": { + "External id": 293886,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367818544.299, "dur": 131.425, + "args": { + "External id": 293887,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367818546.675, "dur": 128.669, + "args": { + "External id": 293888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818568.947, "dur": 6.979, + "args": { + "External id": 293889,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367818577.713, "dur": 96.304, + "args": { + "External id": 293890,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.27", "pid": 2070554, "tid": 2107619, + "ts": 5333367818830.592, "dur": 562.529, + "args": { + "External id": 293891,"Record function id": 0, "Ev Idx": 514 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070554, "tid": 2107619, + "ts": 5333367818848.297, "dur": 532.862, + "args": { + "External id": 293892,"Record function id": 0, "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367818904.246, "dur": 5.843, + "args": { + "External id": 293893,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367818925.393, "dur": 31.815, + "args": { + "External id": 293894,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818930.092, "dur": 1.727, + "args": { + "External id": 293895,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818934.051, "dur": 2.285, + "args": { + "External id": 293896,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818937.534, "dur": 0.580, + "args": { + "External id": 293897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818939.752, "dur": 0.401, + "args": { + "External id": 293898,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818942.823, "dur": 0.409, + "args": { + "External id": 293899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818944.785, "dur": 2.247, + "args": { + "External id": 293900,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818948.060, "dur": 0.423, + "args": { + "External id": 293901,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818951.110, "dur": 0.242, + "args": { + "External id": 293902,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367818952.661, "dur": 0.377, + "args": { + "External id": 293903,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367818967.189, "dur": 29.091, + "args": { + "External id": 293904,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367819027.367, "dur": 100.290, + "args": { + "External id": 293905,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367819037.024, "dur": 3.115, + "args": { + "External id": 293906,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367819045.055, "dur": 12.060, + "args": { + "External id": 293907,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367819051.241, "dur": 5.446, + "args": { + "External id": 293908,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819054.753, "dur": 0.622, + "args": { + "External id": 293909,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367819063.601, "dur": 24.115, + "args": { + "External id": 293910,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819065.332, "dur": 0.331, + "args": { + "External id": 293911,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819068.025, "dur": 0.693, + "args": { + "External id": 293912,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819070.182, "dur": 2.037, + "args": { + "External id": 293913,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819073.541, "dur": 1.652, + "args": { + "External id": 293914,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819076.740, "dur": 0.462, + "args": { + "External id": 293915,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819078.578, "dur": 0.324, + "args": { + "External id": 293916,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819081.355, "dur": 0.362, + "args": { + "External id": 293917,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819082.953, "dur": 0.295, + "args": { + "External id": 293918,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367819084.439, "dur": 0.181, + "args": { + "External id": 293919,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367819100.957, "dur": 19.529, + "args": { + "External id": 293920,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367819188.859, "dur": 125.526, + "args": { + "External id": 293921,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367819216.565, "dur": 94.604, + "args": { + "External id": 293922,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 545, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367819229.398, "dur": 77.379, + "args": { + "External id": 293923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367819329.565, "dur": 1.930, + "args": { + "External id": 293924,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 547, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367819407.616, "dur": 1690.346, + "args": { + "External id": 293925,"Sequence number": 1209218, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 548 + } + }, + { + "ph": "f", "id": 14, "pid": 2070554, "tid": 2107619, "ts": 5333367819407.616, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367819512.182, "dur": 102.167, + "args": { + "External id": 293926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367819697.345, "dur": 41.116, + "args": { + "External id": 293927,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367819763.875, "dur": 57.763, + "args": { + "External id": 293928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367819833.616, "dur": 34.566, + "args": { + "External id": 293929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367819874.094, "dur": 45.789, + "args": { + "External id": 293930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367819926.109, "dur": 28.049, + "args": { + "External id": 293931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367819965.092, "dur": 42.430, + "args": { + "External id": 293932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367820039.936, "dur": 21.933, + "args": { + "External id": 293933,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367820080.659, "dur": 29.080, + "args": { + "External id": 293934,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367820130.195, "dur": 19.438, + "args": { + "External id": 293935,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367820162.488, "dur": 32.128, + "args": { + "External id": 293936,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820204.798, "dur": 34.803, + "args": { + "External id": 293937,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820242.904, "dur": 36.889, + "args": { + "External id": 293938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367820312.883, "dur": 165.919, + "args": { + "External id": 293939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367820388.392, "dur": 5.407, + "args": { + "External id": 293940,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367820395.745, "dur": 2.610, + "args": { + "External id": 293941,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367820510.826, "dur": 26.472, + "args": { + "External id": 293942,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367820551.311, "dur": 14.709, + "args": { + "External id": 293943,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820573.605, "dur": 40.344, + "args": { + "External id": 293944,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820655.046, "dur": 44.665, + "args": { + "External id": 293945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820710.891, "dur": 37.693, + "args": { + "External id": 293946,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820758.663, "dur": 37.078, + "args": { + "External id": 293947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820802.948, "dur": 24.032, + "args": { + "External id": 293948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367820833.699, "dur": 29.151, + "args": { + "External id": 293949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367820891.036, "dur": 25.242, + "args": { + "External id": 293950,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367820937.802, "dur": 28.161, + "args": { + "External id": 293951,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367820981.254, "dur": 18.707, + "args": { + "External id": 293952,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367821017.749, "dur": 14.540, + "args": { + "External id": 293953,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367821045.208, "dur": 24.490, + "args": { + "External id": 293954,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821141.027, "dur": 14.715, + "args": { + "External id": 293955,"Record function id": 0, "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821144.670, "dur": 10.143, + "args": { + "External id": 293956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821148.493, "dur": 5.458, + "args": { + "External id": 293957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821149.790, "dur": 4.024, + "args": { + "External id": 293958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821159.719, "dur": 22.804, + "args": { + "External id": 293959,"Record function id": 0, "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821161.402, "dur": 19.564, + "args": { + "External id": 293960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821162.206, "dur": 3.262, + "args": { + "External id": 293961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821162.914, "dur": 2.468, + "args": { + "External id": 293962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821188.188, "dur": 6.052, + "args": { + "External id": 293963,"Record function id": 0, "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821189.864, "dur": 3.908, + "args": { + "External id": 293964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821191.052, "dur": 2.070, + "args": { + "External id": 293965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821191.639, "dur": 1.338, + "args": { + "External id": 293966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821197.572, "dur": 4.911, + "args": { + "External id": 293967,"Record function id": 0, "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821199.217, "dur": 2.867, + "args": { + "External id": 293968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821200.148, "dur": 1.545, + "args": { + "External id": 293969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821200.607, "dur": 1.008, + "args": { + "External id": 293970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821205.695, "dur": 4.017, + "args": { + "External id": 293971,"Record function id": 0, "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821207.285, "dur": 2.040, + "args": { + "External id": 293972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821208.006, "dur": 0.924, + "args": { + "External id": 293973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821208.278, "dur": 0.587, + "args": { + "External id": 293974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821212.757, "dur": 4.085, + "args": { + "External id": 293975,"Record function id": 0, "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821213.917, "dur": 2.524, + "args": { + "External id": 293976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821214.568, "dur": 1.393, + "args": { + "External id": 293977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821215.112, "dur": 0.777, + "args": { + "External id": 293978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821220.044, "dur": 3.401, + "args": { + "External id": 293979,"Record function id": 0, "Ev Idx": 602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821221.108, "dur": 1.925, + "args": { + "External id": 293980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821221.669, "dur": 0.978, + "args": { + "External id": 293981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821221.917, "dur": 0.625, + "args": { + "External id": 293982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821227.327, "dur": 5.422, + "args": { + "External id": 293983,"Record function id": 0, "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821228.498, "dur": 3.835, + "args": { + "External id": 293984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821228.942, "dur": 2.947, + "args": { + "External id": 293985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821231.136, "dur": 0.653, + "args": { + "External id": 293986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821235.860, "dur": 5.062, + "args": { + "External id": 293987,"Record function id": 0, "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367821237.216, "dur": 3.296, + "args": { + "External id": 293988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821237.681, "dur": 2.434, + "args": { + "External id": 293989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367821237.943, "dur": 2.112, + "args": { + "External id": 293990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367821244.991, "dur": 38810.430, + "args": { + "External id": 293991,"Record function id": 0, "Sequence number": 1209217, "Fwd thread id": 1, "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367821246.288, "dur": 38800.748, + "args": { + "External id": 293992,"Sequence number": 1209217, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 615 + } + }, + { + "ph": "f", "id": 15, "pid": 2070554, "tid": 2107619, "ts": 5333367821246.288, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.28)", "pid": 2070554, "tid": 2107619, + "ts": 5333367821279.601, "dur": 39.976, + "args": { + "External id": 293993,"Record function id": 0, "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.28)", "pid": 2070554, "tid": 2107619, + "ts": 5333367821327.606, "dur": 60.369, + "args": { + "External id": 293994,"Record function id": 0, "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.28)", "pid": 2070554, "tid": 2107619, + "ts": 5333367821393.801, "dur": 38645.809, + "args": { + "External id": 293995,"Record function id": 0, "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367821479.994, "dur": 7.325, + "args": { + "External id": 293996,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367821500.997, "dur": 4.771, + "args": { + "External id": 293997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367821522.740, "dur": 37725.971, + "args": { + "External id": 293998,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367821536.023, "dur": 37704.158, + "args": { + "External id": 293999,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367821572.224, "dur": 13.989, + "args": { + "External id": 294000,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367821592.772, "dur": 37609.073, + "args": { + "External id": 294001,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367821596.241, "dur": 37604.975, + "args": { + "External id": 294002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367821600.510, "dur": 5.546, + "args": { + "External id": 294003,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367821607.725, "dur": 37590.091, + "args": { + "External id": 294004,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367859332.625, "dur": 9.493, + "args": { + "External id": 294005,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367859335.629, "dur": 6.123, + "args": { + "External id": 294006,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367859369.718, "dur": 366.349, + "args": { + "External id": 294007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367859398.651, "dur": 332.146, + "args": { + "External id": 294008,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 631, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367859409.954, "dur": 314.667, + "args": { + "External id": 294009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367859759.713, "dur": 3.262, + "args": { + "External id": 294010,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 633, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859823.503, "dur": 6.553, + "args": { + "External id": 294011,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859876.405, "dur": 3.188, + "args": { + "External id": 294012,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859895.972, "dur": 1.275, + "args": { + "External id": 294013,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859909.269, "dur": 0.807, + "args": { + "External id": 294014,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859924.050, "dur": 0.861, + "args": { + "External id": 294015,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859938.866, "dur": 2.396, + "args": { + "External id": 294016,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859952.612, "dur": 1.239, + "args": { + "External id": 294017,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859965.162, "dur": 1.740, + "args": { + "External id": 294018,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367859976.636, "dur": 1.048, + "args": { + "External id": 294019,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367860069.320, "dur": 2717.141, + "args": { + "External id": 294020,"Record function id": 0, "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.27)", "pid": 2070554, "tid": 2107619, + "ts": 5333367860089.642, "dur": 1007.697, + "args": { + "External id": 294021,"Record function id": 0, "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070554, "tid": 2107619, + "ts": 5333367860102.422, "dur": 323.460, + "args": { + "External id": 294022,"Record function id": 0, "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860192.934, "dur": 6.457, + "args": { + "External id": 294023,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860202.978, "dur": 1.003, + "args": { + "External id": 294024,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860205.655, "dur": 0.816, + "args": { + "External id": 294025,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860208.038, "dur": 0.760, + "args": { + "External id": 294026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860210.229, "dur": 0.548, + "args": { + "External id": 294027,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860213.857, "dur": 0.799, + "args": { + "External id": 294028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860216.157, "dur": 1.748, + "args": { + "External id": 294029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860219.103, "dur": 0.873, + "args": { + "External id": 294030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860221.209, "dur": 2.529, + "args": { + "External id": 294031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367860227.310, "dur": 0.702, + "args": { + "External id": 294032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367860246.720, "dur": 149.909, + "args": { + "External id": 294033,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367860262.694, "dur": 129.378, + "args": { + "External id": 294034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367860280.127, "dur": 12.379, + "args": { + "External id": 294035,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367860295.923, "dur": 67.828, + "args": { + "External id": 294036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367860298.913, "dur": 64.476, + "args": { + "External id": 294037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860304.220, "dur": 6.324, + "args": { + "External id": 294038,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367860312.163, "dur": 50.563, + "args": { + "External id": 294039,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.26", "pid": 2070554, "tid": 2107619, + "ts": 5333367860503.192, "dur": 586.887, + "args": { + "External id": 294040,"Record function id": 0, "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070554, "tid": 2107619, + "ts": 5333367860519.689, "dur": 558.542, + "args": { + "External id": 294041,"Record function id": 0, "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367860571.199, "dur": 4.625, + "args": { + "External id": 294042,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367860591.067, "dur": 67.368, + "args": { + "External id": 294043,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860595.963, "dur": 1.450, + "args": { + "External id": 294044,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860599.291, "dur": 1.373, + "args": { + "External id": 294045,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860602.016, "dur": 0.562, + "args": { + "External id": 294046,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860604.007, "dur": 2.378, + "args": { + "External id": 294047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860608.692, "dur": 0.251, + "args": { + "External id": 294048,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860610.522, "dur": 0.479, + "args": { + "External id": 294049,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860611.964, "dur": 0.697, + "args": { + "External id": 294050,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860614.996, "dur": 0.569, + "args": { + "External id": 294051,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860617.132, "dur": 0.540, + "args": { + "External id": 294052,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367860670.641, "dur": 36.712, + "args": { + "External id": 294053,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367860740.611, "dur": 109.928, + "args": { + "External id": 294054,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367860751.842, "dur": 4.518, + "args": { + "External id": 294055,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367860763.961, "dur": 12.499, + "args": { + "External id": 294056,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367860769.885, "dur": 6.123, + "args": { + "External id": 294057,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860773.875, "dur": 0.874, + "args": { + "External id": 294058,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367860784.019, "dur": 27.166, + "args": { + "External id": 294059,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860786.143, "dur": 2.040, + "args": { + "External id": 294060,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860791.177, "dur": 0.332, + "args": { + "External id": 294061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860792.687, "dur": 0.481, + "args": { + "External id": 294062,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860794.370, "dur": 1.954, + "args": { + "External id": 294063,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860797.372, "dur": 0.550, + "args": { + "External id": 294064,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860799.004, "dur": 0.296, + "args": { + "External id": 294065,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860801.864, "dur": 0.573, + "args": { + "External id": 294066,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860803.766, "dur": 0.466, + "args": { + "External id": 294067,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367860805.475, "dur": 2.070, + "args": { + "External id": 294068,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367860821.659, "dur": 20.914, + "args": { + "External id": 294069,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367860894.880, "dur": 118.526, + "args": { + "External id": 294070,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367860921.463, "dur": 88.386, + "args": { + "External id": 294071,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 694, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367860931.304, "dur": 74.500, + "args": { + "External id": 294072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367861028.316, "dur": 1.780, + "args": { + "External id": 294073,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 696, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367861104.519, "dur": 1661.690, + "args": { + "External id": 294074,"Sequence number": 1209216, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 697 + } + }, + { + "ph": "f", "id": 16, "pid": 2070554, "tid": 2107619, "ts": 5333367861104.519, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861231.433, "dur": 107.606, + "args": { + "External id": 294075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367861382.322, "dur": 37.058, + "args": { + "External id": 294076,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861435.980, "dur": 49.887, + "args": { + "External id": 294077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861495.003, "dur": 33.028, + "args": { + "External id": 294078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861534.055, "dur": 45.928, + "args": { + "External id": 294079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861585.780, "dur": 27.350, + "args": { + "External id": 294080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861660.831, "dur": 49.933, + "args": { + "External id": 294081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367861739.784, "dur": 24.541, + "args": { + "External id": 294082,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367861782.784, "dur": 27.357, + "args": { + "External id": 294083,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367861828.638, "dur": 19.558, + "args": { + "External id": 294084,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367861863.341, "dur": 17.204, + "args": { + "External id": 294085,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861897.250, "dur": 31.708, + "args": { + "External id": 294086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367861931.988, "dur": 32.867, + "args": { + "External id": 294087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367861990.384, "dur": 167.351, + "args": { + "External id": 294088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367862066.677, "dur": 6.695, + "args": { + "External id": 294089,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367862075.146, "dur": 2.764, + "args": { + "External id": 294090,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367862211.676, "dur": 27.683, + "args": { + "External id": 294091,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367862251.199, "dur": 15.024, + "args": { + "External id": 294092,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367862274.122, "dur": 41.377, + "args": { + "External id": 294093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367862323.444, "dur": 36.639, + "args": { + "External id": 294094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367862366.682, "dur": 22.161, + "args": { + "External id": 294095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367862393.074, "dur": 29.490, + "args": { + "External id": 294096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367862429.806, "dur": 21.770, + "args": { + "External id": 294097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367862458.878, "dur": 28.803, + "args": { + "External id": 294098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367862508.297, "dur": 21.754, + "args": { + "External id": 294099,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367862545.560, "dur": 38.831, + "args": { + "External id": 294100,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367862608.464, "dur": 52.791, + "args": { + "External id": 294101,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367862683.066, "dur": 21.202, + "args": { + "External id": 294102,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367862717.815, "dur": 16.372, + "args": { + "External id": 294103,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862808.096, "dur": 17.191, + "args": { + "External id": 294104,"Record function id": 0, "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862811.732, "dur": 12.573, + "args": { + "External id": 294105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862816.079, "dur": 7.309, + "args": { + "External id": 294106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862817.573, "dur": 5.724, + "args": { + "External id": 294107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862829.165, "dur": 4.908, + "args": { + "External id": 294108,"Record function id": 0, "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862830.865, "dur": 2.780, + "args": { + "External id": 294109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862831.691, "dur": 1.559, + "args": { + "External id": 294110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862832.195, "dur": 0.920, + "args": { + "External id": 294111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862837.299, "dur": 4.487, + "args": { + "External id": 294112,"Record function id": 0, "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862838.535, "dur": 2.830, + "args": { + "External id": 294113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862839.466, "dur": 1.542, + "args": { + "External id": 294114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862840.138, "dur": 0.785, + "args": { + "External id": 294115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862844.941, "dur": 4.225, + "args": { + "External id": 294116,"Record function id": 0, "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862846.497, "dur": 2.252, + "args": { + "External id": 294117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862846.973, "dur": 1.409, + "args": { + "External id": 294118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862847.459, "dur": 0.812, + "args": { + "External id": 294119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862852.193, "dur": 6.090, + "args": { + "External id": 294120,"Record function id": 0, "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862853.641, "dur": 4.173, + "args": { + "External id": 294121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862854.464, "dur": 2.831, + "args": { + "External id": 294122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862856.194, "dur": 0.979, + "args": { + "External id": 294123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862861.372, "dur": 3.571, + "args": { + "External id": 294124,"Record function id": 0, "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862862.464, "dur": 2.049, + "args": { + "External id": 294125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862863.002, "dur": 1.121, + "args": { + "External id": 294126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862863.354, "dur": 0.702, + "args": { + "External id": 294127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862868.320, "dur": 3.227, + "args": { + "External id": 294128,"Record function id": 0, "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862869.419, "dur": 1.684, + "args": { + "External id": 294129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862869.860, "dur": 0.856, + "args": { + "External id": 294130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862870.109, "dur": 0.534, + "args": { + "External id": 294131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862874.672, "dur": 5.058, + "args": { + "External id": 294132,"Record function id": 0, "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862875.715, "dur": 3.557, + "args": { + "External id": 294133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862876.173, "dur": 2.735, + "args": { + "External id": 294134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862876.630, "dur": 2.207, + "args": { + "External id": 294135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862882.824, "dur": 5.009, + "args": { + "External id": 294136,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367862884.064, "dur": 3.257, + "args": { + "External id": 294137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862884.522, "dur": 2.406, + "args": { + "External id": 294138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367862886.203, "dur": 0.637, + "args": { + "External id": 294139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367862891.520, "dur": 38607.760, + "args": { + "External id": 294140,"Record function id": 0, "Sequence number": 1209215, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367862892.548, "dur": 38594.966, + "args": { + "External id": 294141,"Sequence number": 1209215, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 17, "pid": 2070554, "tid": 2107619, "ts": 5333367862892.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.27)", "pid": 2070554, "tid": 2107619, + "ts": 5333367862920.567, "dur": 34.367, + "args": { + "External id": 294142,"Record function id": 0, "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.27)", "pid": 2070554, "tid": 2107619, + "ts": 5333367862962.003, "dur": 58.590, + "args": { + "External id": 294143,"Record function id": 0, "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.27)", "pid": 2070554, "tid": 2107619, + "ts": 5333367863025.576, "dur": 38454.574, + "args": { + "External id": 294144,"Record function id": 0, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367863109.167, "dur": 6.264, + "args": { + "External id": 294145,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367863125.241, "dur": 4.549, + "args": { + "External id": 294146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367863143.101, "dur": 37549.033, + "args": { + "External id": 294147,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367863159.025, "dur": 37523.684, + "args": { + "External id": 294148,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367863213.180, "dur": 14.432, + "args": { + "External id": 294149,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367863233.979, "dur": 37409.811, + "args": { + "External id": 294150,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367863236.502, "dur": 37406.548, + "args": { + "External id": 294151,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367863241.007, "dur": 5.609, + "args": { + "External id": 294152,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367863248.171, "dur": 37370.313, + "args": { + "External id": 294153,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367900780.869, "dur": 9.533, + "args": { + "External id": 294154,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367900783.885, "dur": 6.175, + "args": { + "External id": 294155,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367900817.250, "dur": 367.640, + "args": { + "External id": 294156,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367900849.711, "dur": 315.078, + "args": { + "External id": 294157,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 780, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367900861.276, "dur": 296.464, + "args": { + "External id": 294158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367901208.792, "dur": 3.400, + "args": { + "External id": 294159,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 782, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901271.160, "dur": 6.512, + "args": { + "External id": 294160,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901324.496, "dur": 1.356, + "args": { + "External id": 294161,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901341.644, "dur": 1.574, + "args": { + "External id": 294162,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901355.624, "dur": 1.083, + "args": { + "External id": 294163,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901370.769, "dur": 0.764, + "args": { + "External id": 294164,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901384.345, "dur": 0.766, + "args": { + "External id": 294165,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901395.961, "dur": 0.886, + "args": { + "External id": 294166,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901408.251, "dur": 2.050, + "args": { + "External id": 294167,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901421.095, "dur": 0.666, + "args": { + "External id": 294168,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367901514.208, "dur": 2675.975, + "args": { + "External id": 294169,"Record function id": 0, "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.26)", "pid": 2070554, "tid": 2107619, + "ts": 5333367901533.191, "dur": 1000.635, + "args": { + "External id": 294170,"Record function id": 0, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070554, "tid": 2107619, + "ts": 5333367901547.630, "dur": 344.211, + "args": { + "External id": 294171,"Record function id": 0, "Ev Idx": 794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901653.142, "dur": 5.044, + "args": { + "External id": 294172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901663.570, "dur": 0.951, + "args": { + "External id": 294173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901667.804, "dur": 0.948, + "args": { + "External id": 294174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901670.155, "dur": 0.618, + "args": { + "External id": 294175,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901672.140, "dur": 0.812, + "args": { + "External id": 294176,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901674.207, "dur": 0.710, + "args": { + "External id": 294177,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901677.946, "dur": 1.805, + "args": { + "External id": 294178,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901681.512, "dur": 2.349, + "args": { + "External id": 294179,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901685.681, "dur": 0.813, + "args": { + "External id": 294180,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367901688.293, "dur": 0.875, + "args": { + "External id": 294181,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367901709.864, "dur": 153.759, + "args": { + "External id": 294182,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367901725.410, "dur": 133.591, + "args": { + "External id": 294183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367901746.232, "dur": 12.333, + "args": { + "External id": 294184,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367901762.083, "dur": 68.584, + "args": { + "External id": 294185,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367901764.556, "dur": 65.797, + "args": { + "External id": 294186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367901769.340, "dur": 5.311, + "args": { + "External id": 294187,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367901776.088, "dur": 53.672, + "args": { + "External id": 294188,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2070554, "tid": 2107619, + "ts": 5333367901968.852, "dur": 558.005, + "args": { + "External id": 294189,"Record function id": 0, "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070554, "tid": 2107619, + "ts": 5333367901984.797, "dur": 530.304, + "args": { + "External id": 294190,"Record function id": 0, "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367902038.083, "dur": 4.437, + "args": { + "External id": 294191,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367902058.017, "dur": 27.908, + "args": { + "External id": 294192,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902062.771, "dur": 1.560, + "args": { + "External id": 294193,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902066.096, "dur": 1.498, + "args": { + "External id": 294194,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902069.003, "dur": 2.027, + "args": { + "External id": 294195,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902072.062, "dur": 0.734, + "args": { + "External id": 294196,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902075.110, "dur": 0.288, + "args": { + "External id": 294197,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902076.594, "dur": 0.715, + "args": { + "External id": 294198,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902078.102, "dur": 0.485, + "args": { + "External id": 294199,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902080.136, "dur": 0.379, + "args": { + "External id": 294200,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902081.369, "dur": 0.490, + "args": { + "External id": 294201,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367902097.336, "dur": 31.343, + "args": { + "External id": 294202,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367902159.046, "dur": 125.378, + "args": { + "External id": 294203,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367902185.508, "dur": 5.353, + "args": { + "External id": 294204,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367902196.810, "dur": 12.657, + "args": { + "External id": 294205,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367902201.058, "dur": 7.975, + "args": { + "External id": 294206,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902204.634, "dur": 2.666, + "args": { + "External id": 294207,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367902216.646, "dur": 24.446, + "args": { + "External id": 294208,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902218.784, "dur": 0.662, + "args": { + "External id": 294209,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902220.842, "dur": 1.989, + "args": { + "External id": 294210,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902224.001, "dur": 0.558, + "args": { + "External id": 294211,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902225.664, "dur": 0.684, + "args": { + "External id": 294212,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902228.468, "dur": 0.373, + "args": { + "External id": 294213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902230.028, "dur": 0.598, + "args": { + "External id": 294214,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902231.839, "dur": 0.548, + "args": { + "External id": 294215,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902233.839, "dur": 1.805, + "args": { + "External id": 294216,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367902236.439, "dur": 0.253, + "args": { + "External id": 294217,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367902254.954, "dur": 21.495, + "args": { + "External id": 294218,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367902330.429, "dur": 117.842, + "args": { + "External id": 294219,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367902356.557, "dur": 88.442, + "args": { + "External id": 294220,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 843, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367902365.860, "dur": 75.277, + "args": { + "External id": 294221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367902466.397, "dur": 1.940, + "args": { + "External id": 294222,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 845, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367902540.786, "dur": 1611.490, + "args": { + "External id": 294223,"Sequence number": 1209214, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 846 + } + }, + { + "ph": "f", "id": 18, "pid": 2070554, "tid": 2107619, "ts": 5333367902540.786, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367902683.750, "dur": 110.175, + "args": { + "External id": 294224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367902832.538, "dur": 38.373, + "args": { + "External id": 294225,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367902888.056, "dur": 50.179, + "args": { + "External id": 294226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367902947.673, "dur": 32.972, + "args": { + "External id": 294227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367902987.216, "dur": 45.779, + "args": { + "External id": 294228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903039.099, "dur": 26.867, + "args": { + "External id": 294229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903073.881, "dur": 41.668, + "args": { + "External id": 294230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367903137.322, "dur": 22.239, + "args": { + "External id": 294231,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367903194.137, "dur": 30.272, + "args": { + "External id": 294232,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367903245.658, "dur": 19.588, + "args": { + "External id": 294233,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367903277.761, "dur": 14.787, + "args": { + "External id": 294234,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903303.146, "dur": 34.422, + "args": { + "External id": 294235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903341.147, "dur": 33.015, + "args": { + "External id": 294236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367903401.280, "dur": 167.538, + "args": { + "External id": 294237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367903475.232, "dur": 5.543, + "args": { + "External id": 294238,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367903482.463, "dur": 2.933, + "args": { + "External id": 294239,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367903598.568, "dur": 64.051, + "args": { + "External id": 294240,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367903682.271, "dur": 17.022, + "args": { + "External id": 294241,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903709.268, "dur": 42.783, + "args": { + "External id": 294242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903757.869, "dur": 38.564, + "args": { + "External id": 294243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903804.483, "dur": 22.219, + "args": { + "External id": 294244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903832.674, "dur": 29.562, + "args": { + "External id": 294245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903867.123, "dur": 21.106, + "args": { + "External id": 294246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367903894.307, "dur": 28.723, + "args": { + "External id": 294247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367903941.343, "dur": 21.572, + "args": { + "External id": 294248,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367903980.126, "dur": 41.034, + "args": { + "External id": 294249,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367904040.604, "dur": 17.911, + "args": { + "External id": 294250,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367904074.081, "dur": 18.751, + "args": { + "External id": 294251,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367904105.338, "dur": 16.029, + "args": { + "External id": 294252,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904212.911, "dur": 16.447, + "args": { + "External id": 294253,"Record function id": 0, "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904216.004, "dur": 12.332, + "args": { + "External id": 294254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904220.000, "dur": 7.114, + "args": { + "External id": 294255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904221.508, "dur": 5.479, + "args": { + "External id": 294256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904233.253, "dur": 4.769, + "args": { + "External id": 294257,"Record function id": 0, "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904234.827, "dur": 2.760, + "args": { + "External id": 294258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904235.631, "dur": 1.552, + "args": { + "External id": 294259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904236.231, "dur": 0.872, + "args": { + "External id": 294260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904241.493, "dur": 3.832, + "args": { + "External id": 294261,"Record function id": 0, "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904242.433, "dur": 2.471, + "args": { + "External id": 294262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904242.936, "dur": 1.550, + "args": { + "External id": 294263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904243.455, "dur": 0.931, + "args": { + "External id": 294264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904248.426, "dur": 4.755, + "args": { + "External id": 294265,"Record function id": 0, "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904249.651, "dur": 3.081, + "args": { + "External id": 294266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904250.719, "dur": 1.627, + "args": { + "External id": 294267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904251.397, "dur": 0.852, + "args": { + "External id": 294268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904256.252, "dur": 4.039, + "args": { + "External id": 294269,"Record function id": 0, "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904257.823, "dur": 2.032, + "args": { + "External id": 294270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904258.286, "dur": 1.102, + "args": { + "External id": 294271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904258.569, "dur": 0.723, + "args": { + "External id": 294272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904263.368, "dur": 4.908, + "args": { + "External id": 294273,"Record function id": 0, "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904264.624, "dur": 3.213, + "args": { + "External id": 294274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904265.096, "dur": 2.357, + "args": { + "External id": 294275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904266.772, "dur": 0.539, + "args": { + "External id": 294276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904271.483, "dur": 3.413, + "args": { + "External id": 294277,"Record function id": 0, "Ev Idx": 900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904272.664, "dur": 1.807, + "args": { + "External id": 294278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904273.129, "dur": 0.902, + "args": { + "External id": 294279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904273.397, "dur": 0.526, + "args": { + "External id": 294280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904278.059, "dur": 5.396, + "args": { + "External id": 294281,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904278.986, "dur": 4.036, + "args": { + "External id": 294282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904279.461, "dur": 3.175, + "args": { + "External id": 294283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904279.728, "dur": 2.809, + "args": { + "External id": 294284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904286.513, "dur": 3.545, + "args": { + "External id": 294285,"Record function id": 0, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367904287.477, "dur": 2.130, + "args": { + "External id": 294286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904287.951, "dur": 1.248, + "args": { + "External id": 294287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367904288.337, "dur": 0.760, + "args": { + "External id": 294288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367904294.478, "dur": 37157.555, + "args": { + "External id": 294289,"Record function id": 0, "Sequence number": 1209213, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367904295.751, "dur": 37147.642, + "args": { + "External id": 294290,"Sequence number": 1209213, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 19, "pid": 2070554, "tid": 2107619, "ts": 5333367904295.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.26)", "pid": 2070554, "tid": 2107619, + "ts": 5333367904326.962, "dur": 38.355, + "args": { + "External id": 294291,"Record function id": 0, "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.26)", "pid": 2070554, "tid": 2107619, + "ts": 5333367904372.887, "dur": 60.100, + "args": { + "External id": 294292,"Record function id": 0, "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.26)", "pid": 2070554, "tid": 2107619, + "ts": 5333367904438.633, "dur": 36996.962, + "args": { + "External id": 294293,"Record function id": 0, "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367904527.468, "dur": 6.737, + "args": { + "External id": 294294,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367904544.279, "dur": 4.753, + "args": { + "External id": 294295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367904564.376, "dur": 36039.924, + "args": { + "External id": 294296,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367904577.311, "dur": 36017.908, + "args": { + "External id": 294297,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367904608.849, "dur": 52.474, + "args": { + "External id": 294298,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367904668.603, "dur": 35885.571, + "args": { + "External id": 294299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367904672.066, "dur": 35881.606, + "args": { + "External id": 294300,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367904677.169, "dur": 5.650, + "args": { + "External id": 294301,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367904684.674, "dur": 35865.310, + "args": { + "External id": 294302,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367940713.506, "dur": 8.790, + "args": { + "External id": 294303,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367940716.102, "dur": 5.811, + "args": { + "External id": 294304,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367940749.639, "dur": 378.576, + "args": { + "External id": 294305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367940776.440, "dur": 346.710, + "args": { + "External id": 294306,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 929, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367940788.944, "dur": 328.982, + "args": { + "External id": 294307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367941147.270, "dur": 2.356, + "args": { + "External id": 294308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 931, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941228.846, "dur": 6.964, + "args": { + "External id": 294309,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941282.796, "dur": 1.265, + "args": { + "External id": 294310,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941299.404, "dur": 1.476, + "args": { + "External id": 294311,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941311.966, "dur": 0.809, + "args": { + "External id": 294312,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941323.477, "dur": 0.891, + "args": { + "External id": 294313,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941336.753, "dur": 0.718, + "args": { + "External id": 294314,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941349.977, "dur": 1.288, + "args": { + "External id": 294315,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941362.277, "dur": 1.640, + "args": { + "External id": 294316,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941374.066, "dur": 0.615, + "args": { + "External id": 294317,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367941467.663, "dur": 2664.507, + "args": { + "External id": 294318,"Record function id": 0, "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2070554, "tid": 2107619, + "ts": 5333367941486.384, "dur": 984.193, + "args": { + "External id": 294319,"Record function id": 0, "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070554, "tid": 2107619, + "ts": 5333367941500.544, "dur": 343.512, + "args": { + "External id": 294320,"Record function id": 0, "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941572.470, "dur": 3.811, + "args": { + "External id": 294321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941579.012, "dur": 1.021, + "args": { + "External id": 294322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941581.564, "dur": 0.679, + "args": { + "External id": 294323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941584.807, "dur": 1.245, + "args": { + "External id": 294324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941587.530, "dur": 0.791, + "args": { + "External id": 294325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941589.988, "dur": 1.095, + "args": { + "External id": 294326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941592.734, "dur": 2.441, + "args": { + "External id": 294327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941597.740, "dur": 2.497, + "args": { + "External id": 294328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941601.562, "dur": 0.568, + "args": { + "External id": 294329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367941603.565, "dur": 0.789, + "args": { + "External id": 294330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367941657.296, "dur": 155.122, + "args": { + "External id": 294331,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367941673.616, "dur": 134.193, + "args": { + "External id": 294332,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367941693.929, "dur": 13.044, + "args": { + "External id": 294333,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367941710.413, "dur": 69.317, + "args": { + "External id": 294334,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367941712.866, "dur": 66.617, + "args": { + "External id": 294335,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367941717.598, "dur": 7.176, + "args": { + "External id": 294336,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367941726.298, "dur": 52.606, + "args": { + "External id": 294337,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2070554, "tid": 2107619, + "ts": 5333367941922.187, "dur": 541.213, + "args": { + "External id": 294338,"Record function id": 0, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070554, "tid": 2107619, + "ts": 5333367941937.945, "dur": 513.232, + "args": { + "External id": 294339,"Record function id": 0, "Ev Idx": 962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367941991.627, "dur": 4.464, + "args": { + "External id": 294340,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367942010.982, "dur": 25.700, + "args": { + "External id": 294341,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942015.835, "dur": 1.603, + "args": { + "External id": 294342,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942018.920, "dur": 1.466, + "args": { + "External id": 294343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942021.213, "dur": 2.333, + "args": { + "External id": 294344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942024.338, "dur": 0.525, + "args": { + "External id": 294345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942026.858, "dur": 0.475, + "args": { + "External id": 294346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942028.182, "dur": 0.331, + "args": { + "External id": 294347,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942029.402, "dur": 0.550, + "args": { + "External id": 294348,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942031.422, "dur": 0.440, + "args": { + "External id": 294349,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942033.008, "dur": 0.344, + "args": { + "External id": 294350,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367942045.224, "dur": 27.713, + "args": { + "External id": 294351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367942104.819, "dur": 115.217, + "args": { + "External id": 294352,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367942114.001, "dur": 3.129, + "args": { + "External id": 294353,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367942121.803, "dur": 11.808, + "args": { + "External id": 294354,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367942126.073, "dur": 7.106, + "args": { + "External id": 294355,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942129.372, "dur": 2.562, + "args": { + "External id": 294356,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367942139.786, "dur": 22.225, + "args": { + "External id": 294357,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942141.627, "dur": 0.558, + "args": { + "External id": 294358,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942144.268, "dur": 0.424, + "args": { + "External id": 294359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942146.072, "dur": 0.447, + "args": { + "External id": 294360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942147.794, "dur": 1.323, + "args": { + "External id": 294361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942150.538, "dur": 0.241, + "args": { + "External id": 294362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942151.687, "dur": 0.364, + "args": { + "External id": 294363,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942153.918, "dur": 0.233, + "args": { + "External id": 294364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942155.622, "dur": 1.924, + "args": { + "External id": 294365,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367942158.567, "dur": 0.268, + "args": { + "External id": 294366,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367942188.666, "dur": 23.005, + "args": { + "External id": 294367,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367942264.380, "dur": 119.872, + "args": { + "External id": 294368,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367942289.155, "dur": 91.915, + "args": { + "External id": 294369,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 992, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367942298.889, "dur": 78.140, + "args": { + "External id": 294370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367942397.643, "dur": 1.933, + "args": { + "External id": 294371,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 994, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367942477.302, "dur": 1630.541, + "args": { + "External id": 294372,"Sequence number": 1209212, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 995 + } + }, + { + "ph": "f", "id": 20, "pid": 2070554, "tid": 2107619, "ts": 5333367942477.302, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367942577.833, "dur": 146.476, + "args": { + "External id": 294373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367942766.063, "dur": 38.877, + "args": { + "External id": 294374,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367942822.236, "dur": 55.096, + "args": { + "External id": 294375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367942887.576, "dur": 33.576, + "args": { + "External id": 294376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367942927.372, "dur": 45.613, + "args": { + "External id": 294377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367942980.714, "dur": 27.228, + "args": { + "External id": 294378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943016.720, "dur": 41.672, + "args": { + "External id": 294379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367943081.183, "dur": 22.828, + "args": { + "External id": 294380,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367943121.350, "dur": 27.978, + "args": { + "External id": 294381,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367943184.355, "dur": 22.741, + "args": { + "External id": 294382,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367943222.452, "dur": 17.373, + "args": { + "External id": 294383,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943249.143, "dur": 34.482, + "args": { + "External id": 294384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943287.037, "dur": 33.345, + "args": { + "External id": 294385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367943345.316, "dur": 173.046, + "args": { + "External id": 294386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367943426.714, "dur": 5.935, + "args": { + "External id": 294387,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367943434.642, "dur": 3.843, + "args": { + "External id": 294388,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367943549.645, "dur": 25.872, + "args": { + "External id": 294389,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367943586.449, "dur": 15.961, + "args": { + "External id": 294390,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943610.052, "dur": 77.186, + "args": { + "External id": 294391,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943696.414, "dur": 38.792, + "args": { + "External id": 294392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943742.777, "dur": 22.527, + "args": { + "External id": 294393,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943771.485, "dur": 30.480, + "args": { + "External id": 294394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943809.071, "dur": 21.513, + "args": { + "External id": 294395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367943836.223, "dur": 29.332, + "args": { + "External id": 294396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367943891.156, "dur": 32.926, + "args": { + "External id": 294397,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367943946.291, "dur": 26.170, + "args": { + "External id": 294398,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367943985.590, "dur": 17.975, + "args": { + "External id": 294399,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367944020.189, "dur": 14.211, + "args": { + "External id": 294400,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367944059.806, "dur": 19.234, + "args": { + "External id": 294401,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944154.014, "dur": 30.248, + "args": { + "External id": 294402,"Record function id": 0, "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944157.073, "dur": 25.348, + "args": { + "External id": 294403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944160.764, "dur": 4.856, + "args": { + "External id": 294404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944161.946, "dur": 3.579, + "args": { + "External id": 294405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944190.793, "dur": 5.541, + "args": { + "External id": 294406,"Record function id": 0, "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944192.474, "dur": 3.418, + "args": { + "External id": 294407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944193.557, "dur": 1.861, + "args": { + "External id": 294408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944194.011, "dur": 1.317, + "args": { + "External id": 294409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944199.731, "dur": 3.613, + "args": { + "External id": 294410,"Record function id": 0, "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944200.612, "dur": 2.303, + "args": { + "External id": 294411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944201.270, "dur": 1.194, + "args": { + "External id": 294412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944201.753, "dur": 0.623, + "args": { + "External id": 294413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944206.563, "dur": 3.591, + "args": { + "External id": 294414,"Record function id": 0, "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944207.645, "dur": 2.110, + "args": { + "External id": 294415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944208.139, "dur": 1.123, + "args": { + "External id": 294416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944208.456, "dur": 0.734, + "args": { + "External id": 294417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944213.242, "dur": 3.900, + "args": { + "External id": 294418,"Record function id": 0, "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944214.594, "dur": 2.155, + "args": { + "External id": 294419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944215.072, "dur": 1.129, + "args": { + "External id": 294420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944215.654, "dur": 0.482, + "args": { + "External id": 294421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944220.223, "dur": 3.735, + "args": { + "External id": 294422,"Record function id": 0, "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944221.344, "dur": 2.212, + "args": { + "External id": 294423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944221.836, "dur": 1.298, + "args": { + "External id": 294424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944222.462, "dur": 0.600, + "args": { + "External id": 294425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944227.190, "dur": 5.660, + "args": { + "External id": 294426,"Record function id": 0, "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944228.302, "dur": 4.132, + "args": { + "External id": 294427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944228.780, "dur": 3.235, + "args": { + "External id": 294428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944229.336, "dur": 2.614, + "args": { + "External id": 294429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944235.956, "dur": 3.901, + "args": { + "External id": 294430,"Record function id": 0, "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944237.095, "dur": 2.326, + "args": { + "External id": 294431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944237.581, "dur": 1.447, + "args": { + "External id": 294432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944238.175, "dur": 0.744, + "args": { + "External id": 294433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944243.652, "dur": 3.368, + "args": { + "External id": 294434,"Record function id": 0, "Ev Idx": 1057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367944244.634, "dur": 1.987, + "args": { + "External id": 294435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944245.131, "dur": 1.052, + "args": { + "External id": 294436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367944245.489, "dur": 0.630, + "args": { + "External id": 294437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367944250.724, "dur": 35678.678, + "args": { + "External id": 294438,"Record function id": 0, "Sequence number": 1209211, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367944251.882, "dur": 35669.419, + "args": { + "External id": 294439,"Sequence number": 1209211, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 21, "pid": 2070554, "tid": 2107619, "ts": 5333367944251.882, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2070554, "tid": 2107619, + "ts": 5333367944284.176, "dur": 38.212, + "args": { + "External id": 294440,"Record function id": 0, "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2070554, "tid": 2107619, + "ts": 5333367944330.180, "dur": 61.829, + "args": { + "External id": 294441,"Record function id": 0, "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2070554, "tid": 2107619, + "ts": 5333367944397.694, "dur": 35515.725, + "args": { + "External id": 294442,"Record function id": 0, "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367944487.783, "dur": 7.056, + "args": { + "External id": 294443,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367944504.395, "dur": 4.695, + "args": { + "External id": 294444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367944523.898, "dur": 34657.025, + "args": { + "External id": 294445,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367944536.852, "dur": 34626.031, + "args": { + "External id": 294446,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367944572.638, "dur": 13.614, + "args": { + "External id": 294447,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367944592.209, "dur": 34531.863, + "args": { + "External id": 294448,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367944594.542, "dur": 34528.792, + "args": { + "External id": 294449,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367944599.178, "dur": 4.512, + "args": { + "External id": 294450,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367944605.376, "dur": 34514.436, + "args": { + "External id": 294451,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367979268.340, "dur": 8.962, + "args": { + "External id": 294452,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367979271.002, "dur": 5.973, + "args": { + "External id": 294453,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367979306.714, "dur": 285.297, + "args": { + "External id": 294454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367979330.311, "dur": 256.776, + "args": { + "External id": 294455,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1078, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367979342.111, "dur": 239.957, + "args": { + "External id": 294456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367979607.811, "dur": 1.990, + "args": { + "External id": 294457,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1080, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979704.953, "dur": 8.711, + "args": { + "External id": 294458,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979758.508, "dur": 4.385, + "args": { + "External id": 294459,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979777.140, "dur": 1.023, + "args": { + "External id": 294460,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979789.045, "dur": 0.689, + "args": { + "External id": 294461,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979800.548, "dur": 2.751, + "args": { + "External id": 294462,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979813.387, "dur": 0.946, + "args": { + "External id": 294463,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979828.229, "dur": 0.835, + "args": { + "External id": 294464,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979838.875, "dur": 1.452, + "args": { + "External id": 294465,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367979849.206, "dur": 2.038, + "args": { + "External id": 294466,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367979944.359, "dur": 2620.536, + "args": { + "External id": 294467,"Record function id": 0, "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2070554, "tid": 2107619, + "ts": 5333367979963.330, "dur": 975.469, + "args": { + "External id": 294468,"Record function id": 0, "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070554, "tid": 2107619, + "ts": 5333367979978.191, "dur": 315.417, + "args": { + "External id": 294469,"Record function id": 0, "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980050.717, "dur": 4.027, + "args": { + "External id": 294470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980058.085, "dur": 0.544, + "args": { + "External id": 294471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980060.911, "dur": 0.609, + "args": { + "External id": 294472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980063.370, "dur": 0.664, + "args": { + "External id": 294473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980065.539, "dur": 0.726, + "args": { + "External id": 294474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980067.773, "dur": 0.531, + "args": { + "External id": 294475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980070.274, "dur": 2.855, + "args": { + "External id": 294476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980074.820, "dur": 0.665, + "args": { + "External id": 294477,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980077.043, "dur": 0.569, + "args": { + "External id": 294478,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367980079.217, "dur": 0.623, + "args": { + "External id": 294479,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367980097.500, "dur": 164.295, + "args": { + "External id": 294480,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367980112.262, "dur": 144.588, + "args": { + "External id": 294481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367980127.993, "dur": 11.729, + "args": { + "External id": 294482,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367980143.901, "dur": 83.453, + "args": { + "External id": 294483,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367980146.282, "dur": 80.762, + "args": { + "External id": 294484,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980150.390, "dur": 5.845, + "args": { + "External id": 294485,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367980157.884, "dur": 68.206, + "args": { + "External id": 294486,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2070554, "tid": 2107619, + "ts": 5333367980368.101, "dur": 562.799, + "args": { + "External id": 294487,"Record function id": 0, "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070554, "tid": 2107619, + "ts": 5333367980385.339, "dur": 533.770, + "args": { + "External id": 294488,"Record function id": 0, "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367980439.121, "dur": 5.475, + "args": { + "External id": 294489,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367980459.772, "dur": 21.269, + "args": { + "External id": 294490,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980463.618, "dur": 1.514, + "args": { + "External id": 294491,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980466.407, "dur": 2.350, + "args": { + "External id": 294492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980469.528, "dur": 0.411, + "args": { + "External id": 294493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980470.505, "dur": 0.242, + "args": { + "External id": 294494,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980472.587, "dur": 0.501, + "args": { + "External id": 294495,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980473.623, "dur": 0.260, + "args": { + "External id": 294496,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980474.393, "dur": 0.419, + "args": { + "External id": 294497,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980476.430, "dur": 0.317, + "args": { + "External id": 294498,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980477.310, "dur": 0.370, + "args": { + "External id": 294499,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367980490.011, "dur": 31.096, + "args": { + "External id": 294500,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333367980550.455, "dur": 144.109, + "args": { + "External id": 294501,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367980559.351, "dur": 4.672, + "args": { + "External id": 294502,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333367980568.484, "dur": 9.086, + "args": { + "External id": 294503,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333367980572.362, "dur": 4.786, + "args": { + "External id": 294504,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980575.260, "dur": 0.616, + "args": { + "External id": 294505,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333367980586.491, "dur": 23.505, + "args": { + "External id": 294506,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980587.993, "dur": 0.324, + "args": { + "External id": 294507,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980590.450, "dur": 0.359, + "args": { + "External id": 294508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980592.713, "dur": 0.474, + "args": { + "External id": 294509,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980594.628, "dur": 0.647, + "args": { + "External id": 294510,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980596.802, "dur": 0.239, + "args": { + "External id": 294511,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980598.528, "dur": 0.367, + "args": { + "External id": 294512,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980601.018, "dur": 2.030, + "args": { + "External id": 294513,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980604.404, "dur": 0.334, + "args": { + "External id": 294514,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367980606.548, "dur": 0.400, + "args": { + "External id": 294515,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367980661.946, "dur": 23.776, + "args": { + "External id": 294516,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333367980740.999, "dur": 114.214, + "args": { + "External id": 294517,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367980765.355, "dur": 86.506, + "args": { + "External id": 294518,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1141, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333367980775.543, "dur": 72.075, + "args": { + "External id": 294519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333367980869.657, "dur": 1.841, + "args": { + "External id": 294520,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1143, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367980946.025, "dur": 1598.589, + "args": { + "External id": 294521,"Sequence number": 1209210, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1144 + } + }, + { + "ph": "f", "id": 22, "pid": 2070554, "tid": 2107619, "ts": 5333367980946.025, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981049.572, "dur": 103.329, + "args": { + "External id": 294522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367981210.816, "dur": 41.347, + "args": { + "External id": 294523,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981270.881, "dur": 53.758, + "args": { + "External id": 294524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981333.660, "dur": 33.939, + "args": { + "External id": 294525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981373.833, "dur": 45.381, + "args": { + "External id": 294526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981427.233, "dur": 27.988, + "args": { + "External id": 294527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981464.328, "dur": 42.104, + "args": { + "External id": 294528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367981531.294, "dur": 22.472, + "args": { + "External id": 294529,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333367981576.902, "dur": 28.073, + "args": { + "External id": 294530,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367981660.752, "dur": 23.943, + "args": { + "External id": 294531,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367981700.000, "dur": 15.636, + "args": { + "External id": 294532,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981724.953, "dur": 39.274, + "args": { + "External id": 294533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367981767.638, "dur": 33.349, + "args": { + "External id": 294534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333367981827.559, "dur": 166.368, + "args": { + "External id": 294535,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367981904.301, "dur": 5.151, + "args": { + "External id": 294536,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367981911.266, "dur": 2.914, + "args": { + "External id": 294537,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367982024.558, "dur": 24.669, + "args": { + "External id": 294538,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333367982063.316, "dur": 15.953, + "args": { + "External id": 294539,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367982087.036, "dur": 34.658, + "args": { + "External id": 294540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367982126.515, "dur": 33.604, + "args": { + "External id": 294541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367982185.287, "dur": 27.530, + "args": { + "External id": 294542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367982220.001, "dur": 33.013, + "args": { + "External id": 294543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367982258.661, "dur": 21.602, + "args": { + "External id": 294544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333367982285.289, "dur": 29.474, + "args": { + "External id": 294545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333367982335.073, "dur": 37.579, + "args": { + "External id": 294546,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367982396.352, "dur": 26.216, + "args": { + "External id": 294547,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333367982435.713, "dur": 18.056, + "args": { + "External id": 294548,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333367982471.802, "dur": 13.993, + "args": { + "External id": 294549,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333367982496.738, "dur": 15.693, + "args": { + "External id": 294550,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982586.430, "dur": 14.021, + "args": { + "External id": 294551,"Record function id": 0, "Ev Idx": 1174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982589.629, "dur": 9.821, + "args": { + "External id": 294552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982593.570, "dur": 5.048, + "args": { + "External id": 294553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982594.771, "dur": 3.729, + "args": { + "External id": 294554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982604.156, "dur": 4.596, + "args": { + "External id": 294555,"Record function id": 0, "Ev Idx": 1178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982605.762, "dur": 2.547, + "args": { + "External id": 294556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982606.536, "dur": 1.350, + "args": { + "External id": 294557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982606.921, "dur": 0.882, + "args": { + "External id": 294558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982611.976, "dur": 4.073, + "args": { + "External id": 294559,"Record function id": 0, "Ev Idx": 1182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982613.289, "dur": 2.314, + "args": { + "External id": 294560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982613.976, "dur": 1.206, + "args": { + "External id": 294561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982614.458, "dur": 0.641, + "args": { + "External id": 294562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982655.715, "dur": 6.481, + "args": { + "External id": 294563,"Record function id": 0, "Ev Idx": 1186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982657.819, "dur": 3.692, + "args": { + "External id": 294564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982658.883, "dur": 1.758, + "args": { + "External id": 294565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982659.316, "dur": 1.117, + "args": { + "External id": 294566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982666.312, "dur": 4.272, + "args": { + "External id": 294567,"Record function id": 0, "Ev Idx": 1190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982667.578, "dur": 2.537, + "args": { + "External id": 294568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982668.300, "dur": 1.395, + "args": { + "External id": 294569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982668.835, "dur": 0.756, + "args": { + "External id": 294570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982673.619, "dur": 3.977, + "args": { + "External id": 294571,"Record function id": 0, "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982674.853, "dur": 2.321, + "args": { + "External id": 294572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982675.366, "dur": 1.392, + "args": { + "External id": 294573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982676.012, "dur": 0.637, + "args": { + "External id": 294574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982680.760, "dur": 7.659, + "args": { + "External id": 294575,"Record function id": 0, "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982682.089, "dur": 5.848, + "args": { + "External id": 294576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982684.374, "dur": 3.145, + "args": { + "External id": 294577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982685.039, "dur": 2.406, + "args": { + "External id": 294578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982692.355, "dur": 4.064, + "args": { + "External id": 294579,"Record function id": 0, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982693.591, "dur": 2.404, + "args": { + "External id": 294580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982694.097, "dur": 1.477, + "args": { + "External id": 294581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982694.702, "dur": 0.768, + "args": { + "External id": 294582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982699.502, "dur": 4.213, + "args": { + "External id": 294583,"Record function id": 0, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333367982700.832, "dur": 2.457, + "args": { + "External id": 294584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982701.503, "dur": 1.374, + "args": { + "External id": 294585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333367982702.226, "dur": 0.544, + "args": { + "External id": 294586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367982707.420, "dur": 35673.325, + "args": { + "External id": 294587,"Record function id": 0, "Sequence number": 1209209, "Fwd thread id": 1, "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333367982708.513, "dur": 35663.658, + "args": { + "External id": 294588,"Sequence number": 1209209, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1211 + } + }, + { + "ph": "f", "id": 23, "pid": 2070554, "tid": 2107619, "ts": 5333367982708.513, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2070554, "tid": 2107619, + "ts": 5333367982741.221, "dur": 38.675, + "args": { + "External id": 294589,"Record function id": 0, "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2070554, "tid": 2107619, + "ts": 5333367982787.452, "dur": 62.084, + "args": { + "External id": 294590,"Record function id": 0, "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2070554, "tid": 2107619, + "ts": 5333367982855.182, "dur": 35509.408, + "args": { + "External id": 294591,"Record function id": 0, "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367982940.054, "dur": 7.161, + "args": { + "External id": 294592,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333367982955.887, "dur": 4.987, + "args": { + "External id": 294593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367982974.838, "dur": 34585.998, + "args": { + "External id": 294594,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333367982987.589, "dur": 34564.394, + "args": { + "External id": 294595,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333367983022.819, "dur": 13.991, + "args": { + "External id": 294596,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333367983043.053, "dur": 34466.487, + "args": { + "External id": 294597,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333367983045.665, "dur": 34463.231, + "args": { + "External id": 294598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333367983049.844, "dur": 5.778, + "args": { + "External id": 294599,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333367983057.409, "dur": 34447.712, + "args": { + "External id": 294600,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368017675.322, "dur": 9.637, + "args": { + "External id": 294601,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368017678.221, "dur": 6.053, + "args": { + "External id": 294602,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368017715.200, "dur": 348.477, + "args": { + "External id": 294603,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368017741.828, "dur": 317.002, + "args": { + "External id": 294604,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1227, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368017753.281, "dur": 300.342, + "args": { + "External id": 294605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368018081.620, "dur": 2.318, + "args": { + "External id": 294606,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1229, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018142.664, "dur": 7.571, + "args": { + "External id": 294607,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018214.774, "dur": 2.120, + "args": { + "External id": 294608,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018232.330, "dur": 1.145, + "args": { + "External id": 294609,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018244.674, "dur": 0.627, + "args": { + "External id": 294610,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018257.355, "dur": 2.788, + "args": { + "External id": 294611,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018270.282, "dur": 0.793, + "args": { + "External id": 294612,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018281.171, "dur": 1.067, + "args": { + "External id": 294613,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018291.893, "dur": 2.174, + "args": { + "External id": 294614,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018303.780, "dur": 2.558, + "args": { + "External id": 294615,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368018395.497, "dur": 2635.214, + "args": { + "External id": 294616,"Record function id": 0, "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2070554, "tid": 2107619, + "ts": 5333368018413.348, "dur": 984.718, + "args": { + "External id": 294617,"Record function id": 0, "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070554, "tid": 2107619, + "ts": 5333368018427.983, "dur": 341.886, + "args": { + "External id": 294618,"Record function id": 0, "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018502.196, "dur": 4.034, + "args": { + "External id": 294619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018509.134, "dur": 0.982, + "args": { + "External id": 294620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018512.023, "dur": 0.649, + "args": { + "External id": 294621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018514.527, "dur": 0.930, + "args": { + "External id": 294622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018517.013, "dur": 0.505, + "args": { + "External id": 294623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018519.021, "dur": 0.565, + "args": { + "External id": 294624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018521.042, "dur": 3.064, + "args": { + "External id": 294625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018525.593, "dur": 0.660, + "args": { + "External id": 294626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018527.821, "dur": 0.768, + "args": { + "External id": 294627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368018530.180, "dur": 0.625, + "args": { + "External id": 294628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368018548.743, "dur": 187.936, + "args": { + "External id": 294629,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368018564.098, "dur": 167.727, + "args": { + "External id": 294630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368018578.856, "dur": 12.595, + "args": { + "External id": 294631,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368018594.907, "dur": 107.107, + "args": { + "External id": 294632,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368018597.307, "dur": 104.296, + "args": { + "External id": 294633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018602.516, "dur": 6.250, + "args": { + "External id": 294634,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368018610.390, "dur": 90.184, + "args": { + "External id": 294635,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2070554, "tid": 2107619, + "ts": 5333368018847.163, "dur": 543.438, + "args": { + "External id": 294636,"Record function id": 0, "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070554, "tid": 2107619, + "ts": 5333368018863.519, "dur": 514.967, + "args": { + "External id": 294637,"Record function id": 0, "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368018921.055, "dur": 5.440, + "args": { + "External id": 294638,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368018941.106, "dur": 28.534, + "args": { + "External id": 294639,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018945.806, "dur": 1.343, + "args": { + "External id": 294640,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018949.324, "dur": 3.328, + "args": { + "External id": 294641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018954.386, "dur": 0.406, + "args": { + "External id": 294642,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018956.102, "dur": 0.480, + "args": { + "External id": 294643,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018958.750, "dur": 0.362, + "args": { + "External id": 294644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018960.304, "dur": 0.330, + "args": { + "External id": 294645,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018961.804, "dur": 0.354, + "args": { + "External id": 294646,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018963.868, "dur": 0.389, + "args": { + "External id": 294647,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368018965.690, "dur": 0.323, + "args": { + "External id": 294648,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368018979.383, "dur": 29.483, + "args": { + "External id": 294649,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368019038.906, "dur": 96.470, + "args": { + "External id": 294650,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368019048.796, "dur": 4.595, + "args": { + "External id": 294651,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368019058.000, "dur": 9.585, + "args": { + "External id": 294652,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368019062.158, "dur": 4.998, + "args": { + "External id": 294653,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019065.529, "dur": 0.461, + "args": { + "External id": 294654,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368019074.325, "dur": 24.433, + "args": { + "External id": 294655,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019076.308, "dur": 0.553, + "args": { + "External id": 294656,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019078.825, "dur": 0.460, + "args": { + "External id": 294657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019080.683, "dur": 0.338, + "args": { + "External id": 294658,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019082.315, "dur": 1.134, + "args": { + "External id": 294659,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019084.989, "dur": 0.436, + "args": { + "External id": 294660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019086.889, "dur": 0.226, + "args": { + "External id": 294661,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019089.662, "dur": 2.004, + "args": { + "External id": 294662,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019092.942, "dur": 0.318, + "args": { + "External id": 294663,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368019094.616, "dur": 0.426, + "args": { + "External id": 294664,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368019108.194, "dur": 20.072, + "args": { + "External id": 294665,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368019196.491, "dur": 119.354, + "args": { + "External id": 294666,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368019222.815, "dur": 89.551, + "args": { + "External id": 294667,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1290, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368019232.696, "dur": 75.312, + "args": { + "External id": 294668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368019329.314, "dur": 2.021, + "args": { + "External id": 294669,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1292, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368019404.391, "dur": 1602.694, + "args": { + "External id": 294670,"Sequence number": 1209208, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1293 + } + }, + { + "ph": "f", "id": 24, "pid": 2070554, "tid": 2107619, "ts": 5333368019404.391, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368019508.874, "dur": 104.664, + "args": { + "External id": 294671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368019694.142, "dur": 39.444, + "args": { + "External id": 294672,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368019751.604, "dur": 57.490, + "args": { + "External id": 294673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368019818.679, "dur": 38.367, + "args": { + "External id": 294674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368019863.129, "dur": 44.579, + "args": { + "External id": 294675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368019914.681, "dur": 27.079, + "args": { + "External id": 294676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368019951.212, "dur": 42.150, + "args": { + "External id": 294677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368020013.811, "dur": 22.522, + "args": { + "External id": 294678,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368020052.162, "dur": 27.219, + "args": { + "External id": 294679,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368020097.371, "dur": 18.578, + "args": { + "External id": 294680,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368020127.240, "dur": 15.406, + "args": { + "External id": 294681,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020151.112, "dur": 47.459, + "args": { + "External id": 294682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020203.711, "dur": 38.991, + "args": { + "External id": 294683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368020268.944, "dur": 173.929, + "args": { + "External id": 294684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368020346.851, "dur": 6.086, + "args": { + "External id": 294685,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368020355.007, "dur": 2.765, + "args": { + "External id": 294686,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368020472.932, "dur": 24.414, + "args": { + "External id": 294687,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368020508.974, "dur": 15.017, + "args": { + "External id": 294688,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020531.278, "dur": 36.036, + "args": { + "External id": 294689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020572.319, "dur": 34.815, + "args": { + "External id": 294690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020613.398, "dur": 60.089, + "args": { + "External id": 294691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020683.077, "dur": 33.994, + "args": { + "External id": 294692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020723.684, "dur": 21.085, + "args": { + "External id": 294693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368020750.803, "dur": 29.954, + "args": { + "External id": 294694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368020800.592, "dur": 22.814, + "args": { + "External id": 294695,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368020850.009, "dur": 32.053, + "args": { + "External id": 294696,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368020900.452, "dur": 16.760, + "args": { + "External id": 294697,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368020932.820, "dur": 14.160, + "args": { + "External id": 294698,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368020963.245, "dur": 15.435, + "args": { + "External id": 294699,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021052.523, "dur": 14.434, + "args": { + "External id": 294700,"Record function id": 0, "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021056.054, "dur": 9.967, + "args": { + "External id": 294701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021060.014, "dur": 5.178, + "args": { + "External id": 294702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021061.424, "dur": 3.668, + "args": { + "External id": 294703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021070.678, "dur": 8.104, + "args": { + "External id": 294704,"Record function id": 0, "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021072.148, "dur": 6.170, + "args": { + "External id": 294705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021076.218, "dur": 1.676, + "args": { + "External id": 294706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021076.864, "dur": 0.921, + "args": { + "External id": 294707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021082.087, "dur": 4.250, + "args": { + "External id": 294708,"Record function id": 0, "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021083.544, "dur": 2.314, + "args": { + "External id": 294709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021084.254, "dur": 1.183, + "args": { + "External id": 294710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021084.719, "dur": 0.638, + "args": { + "External id": 294711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021089.518, "dur": 3.917, + "args": { + "External id": 294712,"Record function id": 0, "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021090.868, "dur": 2.135, + "args": { + "External id": 294713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021091.557, "dur": 1.020, + "args": { + "External id": 294714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021091.877, "dur": 0.625, + "args": { + "External id": 294715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021096.641, "dur": 4.251, + "args": { + "External id": 294716,"Record function id": 0, "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021097.934, "dur": 2.493, + "args": { + "External id": 294717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021098.692, "dur": 1.309, + "args": { + "External id": 294718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021099.341, "dur": 0.569, + "args": { + "External id": 294719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021104.070, "dur": 4.179, + "args": { + "External id": 294720,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021105.394, "dur": 2.409, + "args": { + "External id": 294721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021105.874, "dur": 1.533, + "args": { + "External id": 294722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021106.697, "dur": 0.639, + "args": { + "External id": 294723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021111.658, "dur": 5.646, + "args": { + "External id": 294724,"Record function id": 0, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021112.819, "dur": 4.055, + "args": { + "External id": 294725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021113.542, "dur": 2.925, + "args": { + "External id": 294726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021114.165, "dur": 2.235, + "args": { + "External id": 294727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021120.435, "dur": 4.190, + "args": { + "External id": 294728,"Record function id": 0, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021121.829, "dur": 2.348, + "args": { + "External id": 294729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021122.307, "dur": 1.476, + "args": { + "External id": 294730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021122.828, "dur": 0.884, + "args": { + "External id": 294731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021127.741, "dur": 3.818, + "args": { + "External id": 294732,"Record function id": 0, "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368021129.044, "dur": 2.063, + "args": { + "External id": 294733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021129.519, "dur": 1.190, + "args": { + "External id": 294734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368021130.036, "dur": 0.602, + "args": { + "External id": 294735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368021138.259, "dur": 38081.910, + "args": { + "External id": 294736,"Record function id": 0, "Sequence number": 1209207, "Fwd thread id": 1, "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368021139.964, "dur": 38071.140, + "args": { + "External id": 294737,"Sequence number": 1209207, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1360 + } + }, + { + "ph": "f", "id": 25, "pid": 2070554, "tid": 2107619, "ts": 5333368021139.964, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2070554, "tid": 2107619, + "ts": 5333368021184.871, "dur": 40.590, + "args": { + "External id": 294738,"Record function id": 0, "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2070554, "tid": 2107619, + "ts": 5333368021234.227, "dur": 62.994, + "args": { + "External id": 294739,"Record function id": 0, "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2070554, "tid": 2107619, + "ts": 5333368021303.597, "dur": 37898.372, + "args": { + "External id": 294740,"Record function id": 0, "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368021391.405, "dur": 6.854, + "args": { + "External id": 294741,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368021408.920, "dur": 4.847, + "args": { + "External id": 294742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368021428.131, "dur": 36913.514, + "args": { + "External id": 294743,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368021441.374, "dur": 36891.980, + "args": { + "External id": 294744,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368021480.491, "dur": 14.091, + "args": { + "External id": 294745,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368021501.046, "dur": 36794.369, + "args": { + "External id": 294746,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368021503.280, "dur": 36791.446, + "args": { + "External id": 294747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368021507.336, "dur": 5.852, + "args": { + "External id": 294748,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368021515.012, "dur": 36776.505, + "args": { + "External id": 294749,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368058424.342, "dur": 8.757, + "args": { + "External id": 294750,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368058427.050, "dur": 5.679, + "args": { + "External id": 294751,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368058461.894, "dur": 420.687, + "args": { + "External id": 294752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368058489.226, "dur": 387.994, + "args": { + "External id": 294753,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1376, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368058501.195, "dur": 368.687, + "args": { + "External id": 294754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368058908.017, "dur": 2.751, + "args": { + "External id": 294755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1378, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368058977.097, "dur": 7.798, + "args": { + "External id": 294756,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059031.427, "dur": 1.041, + "args": { + "External id": 294757,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059046.432, "dur": 1.162, + "args": { + "External id": 294758,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059058.373, "dur": 0.835, + "args": { + "External id": 294759,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059072.762, "dur": 2.441, + "args": { + "External id": 294760,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059085.105, "dur": 0.859, + "args": { + "External id": 294761,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059095.195, "dur": 0.611, + "args": { + "External id": 294762,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059105.535, "dur": 1.649, + "args": { + "External id": 294763,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059116.406, "dur": 2.242, + "args": { + "External id": 294764,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368059235.175, "dur": 2652.337, + "args": { + "External id": 294765,"Record function id": 0, "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2070554, "tid": 2107619, + "ts": 5333368059255.838, "dur": 985.311, + "args": { + "External id": 294766,"Record function id": 0, "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070554, "tid": 2107619, + "ts": 5333368059270.080, "dur": 302.056, + "args": { + "External id": 294767,"Record function id": 0, "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059349.137, "dur": 4.478, + "args": { + "External id": 294768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059356.770, "dur": 1.235, + "args": { + "External id": 294769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059359.799, "dur": 0.858, + "args": { + "External id": 294770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059362.498, "dur": 0.589, + "args": { + "External id": 294771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059364.770, "dur": 0.775, + "args": { + "External id": 294772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059367.090, "dur": 0.901, + "args": { + "External id": 294773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059369.822, "dur": 3.478, + "args": { + "External id": 294774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059375.008, "dur": 0.567, + "args": { + "External id": 294775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059377.257, "dur": 0.823, + "args": { + "External id": 294776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368059379.719, "dur": 0.705, + "args": { + "External id": 294777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368059398.479, "dur": 147.471, + "args": { + "External id": 294778,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368059413.318, "dur": 128.809, + "args": { + "External id": 294779,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368059429.608, "dur": 11.791, + "args": { + "External id": 294780,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368059444.997, "dur": 68.552, + "args": { + "External id": 294781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368059449.156, "dur": 64.142, + "args": { + "External id": 294782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059452.597, "dur": 5.947, + "args": { + "External id": 294783,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368059460.225, "dur": 52.390, + "args": { + "External id": 294784,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2070554, "tid": 2107619, + "ts": 5333368059685.812, "dur": 548.412, + "args": { + "External id": 294785,"Record function id": 0, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070554, "tid": 2107619, + "ts": 5333368059702.329, "dur": 519.179, + "args": { + "External id": 294786,"Record function id": 0, "Ev Idx": 1409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368059757.416, "dur": 9.130, + "args": { + "External id": 294787,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368059781.767, "dur": 28.646, + "args": { + "External id": 294788,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059786.713, "dur": 1.757, + "args": { + "External id": 294789,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059791.147, "dur": 2.069, + "args": { + "External id": 294790,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059794.501, "dur": 0.197, + "args": { + "External id": 294791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059795.655, "dur": 0.415, + "args": { + "External id": 294792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059798.931, "dur": 0.759, + "args": { + "External id": 294793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059800.931, "dur": 0.471, + "args": { + "External id": 294794,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059802.203, "dur": 1.333, + "args": { + "External id": 294795,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059804.727, "dur": 0.355, + "args": { + "External id": 294796,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059806.289, "dur": 0.260, + "args": { + "External id": 294797,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368059824.673, "dur": 32.291, + "args": { + "External id": 294798,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368059890.432, "dur": 99.416, + "args": { + "External id": 294799,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368059899.952, "dur": 5.147, + "args": { + "External id": 294800,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368059910.198, "dur": 9.398, + "args": { + "External id": 294801,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368059914.382, "dur": 4.729, + "args": { + "External id": 294802,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059917.375, "dur": 0.522, + "args": { + "External id": 294803,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368059925.726, "dur": 24.964, + "args": { + "External id": 294804,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059927.542, "dur": 0.588, + "args": { + "External id": 294805,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059929.423, "dur": 1.909, + "args": { + "External id": 294806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059932.210, "dur": 0.289, + "args": { + "External id": 294807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059933.892, "dur": 0.355, + "args": { + "External id": 294808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059936.456, "dur": 0.309, + "args": { + "External id": 294809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059940.588, "dur": 0.282, + "args": { + "External id": 294810,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059941.800, "dur": 2.268, + "args": { + "External id": 294811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059946.037, "dur": 0.389, + "args": { + "External id": 294812,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368059947.274, "dur": 0.502, + "args": { + "External id": 294813,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368059961.889, "dur": 20.500, + "args": { + "External id": 294814,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368060031.607, "dur": 107.240, + "args": { + "External id": 294815,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368060051.196, "dur": 84.405, + "args": { + "External id": 294816,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1439, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368060060.409, "dur": 71.055, + "args": { + "External id": 294817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368060149.869, "dur": 1.817, + "args": { + "External id": 294818,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1441, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368060247.960, "dur": 1619.433, + "args": { + "External id": 294819,"Sequence number": 1209206, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1442 + } + }, + { + "ph": "f", "id": 26, "pid": 2070554, "tid": 2107619, "ts": 5333368060247.960, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368060350.506, "dur": 106.097, + "args": { + "External id": 294820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368060494.047, "dur": 37.471, + "args": { + "External id": 294821,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368060547.878, "dur": 49.828, + "args": { + "External id": 294822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368060606.829, "dur": 73.862, + "args": { + "External id": 294823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368060692.189, "dur": 49.721, + "args": { + "External id": 294824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368060749.954, "dur": 30.351, + "args": { + "External id": 294825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368060788.978, "dur": 41.712, + "args": { + "External id": 294826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368060853.532, "dur": 24.716, + "args": { + "External id": 294827,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368060896.026, "dur": 29.886, + "args": { + "External id": 294828,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368060944.216, "dur": 20.215, + "args": { + "External id": 294829,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368060974.701, "dur": 16.366, + "args": { + "External id": 294830,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368060999.823, "dur": 31.175, + "args": { + "External id": 294831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368061033.888, "dur": 32.911, + "args": { + "External id": 294832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368061093.027, "dur": 185.383, + "args": { + "External id": 294833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368061184.480, "dur": 6.384, + "args": { + "External id": 294834,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368061192.962, "dur": 3.102, + "args": { + "External id": 294835,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368061310.687, "dur": 25.694, + "args": { + "External id": 294836,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368061349.995, "dur": 15.360, + "args": { + "External id": 294837,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368061376.340, "dur": 42.286, + "args": { + "External id": 294838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368061424.268, "dur": 38.166, + "args": { + "External id": 294839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368061472.190, "dur": 21.997, + "args": { + "External id": 294840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368061500.481, "dur": 33.135, + "args": { + "External id": 294841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368061539.652, "dur": 22.266, + "args": { + "External id": 294842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368061567.617, "dur": 32.508, + "args": { + "External id": 294843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368061652.477, "dur": 41.941, + "args": { + "External id": 294844,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368061721.616, "dur": 23.396, + "args": { + "External id": 294845,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368061760.345, "dur": 19.413, + "args": { + "External id": 294846,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368061795.875, "dur": 12.924, + "args": { + "External id": 294847,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368061821.196, "dur": 18.182, + "args": { + "External id": 294848,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061908.384, "dur": 14.403, + "args": { + "External id": 294849,"Record function id": 0, "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061911.502, "dur": 10.388, + "args": { + "External id": 294850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061915.709, "dur": 5.325, + "args": { + "External id": 294851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061917.065, "dur": 3.812, + "args": { + "External id": 294852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061926.643, "dur": 4.473, + "args": { + "External id": 294853,"Record function id": 0, "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061927.956, "dur": 2.697, + "args": { + "External id": 294854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061928.729, "dur": 1.435, + "args": { + "External id": 294855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061929.264, "dur": 0.821, + "args": { + "External id": 294856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061934.496, "dur": 4.033, + "args": { + "External id": 294857,"Record function id": 0, "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061935.781, "dur": 2.307, + "args": { + "External id": 294858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061936.450, "dur": 1.205, + "args": { + "External id": 294859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061937.014, "dur": 0.529, + "args": { + "External id": 294860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061941.628, "dur": 4.435, + "args": { + "External id": 294861,"Record function id": 0, "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061943.382, "dur": 2.239, + "args": { + "External id": 294862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061943.908, "dur": 1.301, + "args": { + "External id": 294863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061944.396, "dur": 0.707, + "args": { + "External id": 294864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061949.353, "dur": 3.907, + "args": { + "External id": 294865,"Record function id": 0, "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061950.315, "dur": 2.478, + "args": { + "External id": 294866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061951.039, "dur": 1.345, + "args": { + "External id": 294867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061951.743, "dur": 0.545, + "args": { + "External id": 294868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061956.350, "dur": 4.229, + "args": { + "External id": 294869,"Record function id": 0, "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061957.544, "dur": 2.577, + "args": { + "External id": 294870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061958.022, "dur": 1.692, + "args": { + "External id": 294871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061959.008, "dur": 0.606, + "args": { + "External id": 294872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061964.014, "dur": 5.866, + "args": { + "External id": 294873,"Record function id": 0, "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061964.894, "dur": 4.480, + "args": { + "External id": 294874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061965.746, "dur": 3.227, + "args": { + "External id": 294875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061966.332, "dur": 2.558, + "args": { + "External id": 294876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061973.134, "dur": 3.969, + "args": { + "External id": 294877,"Record function id": 0, "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061974.282, "dur": 2.393, + "args": { + "External id": 294878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061974.762, "dur": 1.509, + "args": { + "External id": 294879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061975.533, "dur": 0.643, + "args": { + "External id": 294880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061980.354, "dur": 4.322, + "args": { + "External id": 294881,"Record function id": 0, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368061981.304, "dur": 2.926, + "args": { + "External id": 294882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061982.210, "dur": 1.611, + "args": { + "External id": 294883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368061983.184, "dur": 0.542, + "args": { + "External id": 294884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368061988.229, "dur": 35843.672, + "args": { + "External id": 294885,"Record function id": 0, "Sequence number": 1209205, "Fwd thread id": 1, "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368061989.533, "dur": 35833.517, + "args": { + "External id": 294886,"Sequence number": 1209205, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1509 + } + }, + { + "ph": "f", "id": 27, "pid": 2070554, "tid": 2107619, "ts": 5333368061989.533, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2070554, "tid": 2107619, + "ts": 5333368062017.852, "dur": 37.890, + "args": { + "External id": 294887,"Record function id": 0, "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2070554, "tid": 2107619, + "ts": 5333368062063.227, "dur": 58.629, + "args": { + "External id": 294888,"Record function id": 0, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2070554, "tid": 2107619, + "ts": 5333368062127.536, "dur": 35686.954, + "args": { + "External id": 294889,"Record function id": 0, "Ev Idx": 1512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368062229.583, "dur": 7.789, + "args": { + "External id": 294890,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368062247.646, "dur": 4.915, + "args": { + "External id": 294891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368062267.666, "dur": 34700.752, + "args": { + "External id": 294892,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368062281.026, "dur": 34679.089, + "args": { + "External id": 294893,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368062318.687, "dur": 13.767, + "args": { + "External id": 294894,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368062338.363, "dur": 34584.474, + "args": { + "External id": 294895,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368062340.880, "dur": 34581.339, + "args": { + "External id": 294896,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368062344.862, "dur": 5.707, + "args": { + "External id": 294897,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368062352.207, "dur": 34565.901, + "args": { + "External id": 294898,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368097052.611, "dur": 8.733, + "args": { + "External id": 294899,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368097055.400, "dur": 5.569, + "args": { + "External id": 294900,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368097088.049, "dur": 392.805, + "args": { + "External id": 294901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368097116.724, "dur": 359.101, + "args": { + "External id": 294902,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1525, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368097129.021, "dur": 341.486, + "args": { + "External id": 294903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368097506.203, "dur": 2.382, + "args": { + "External id": 294904,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1527, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097567.585, "dur": 7.831, + "args": { + "External id": 294905,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097616.880, "dur": 1.031, + "args": { + "External id": 294906,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097669.702, "dur": 1.822, + "args": { + "External id": 294907,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097682.676, "dur": 1.103, + "args": { + "External id": 294908,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097694.895, "dur": 2.300, + "args": { + "External id": 294909,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097707.534, "dur": 0.738, + "args": { + "External id": 294910,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097717.856, "dur": 0.796, + "args": { + "External id": 294911,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097728.176, "dur": 1.816, + "args": { + "External id": 294912,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368097751.472, "dur": 2.115, + "args": { + "External id": 294913,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368097846.920, "dur": 2663.145, + "args": { + "External id": 294914,"Record function id": 0, "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2070554, "tid": 2107619, + "ts": 5333368097867.075, "dur": 989.444, + "args": { + "External id": 294915,"Record function id": 0, "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070554, "tid": 2107619, + "ts": 5333368097881.253, "dur": 316.589, + "args": { + "External id": 294916,"Record function id": 0, "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097956.252, "dur": 4.123, + "args": { + "External id": 294917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097963.591, "dur": 0.901, + "args": { + "External id": 294918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097966.153, "dur": 0.704, + "args": { + "External id": 294919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097968.585, "dur": 0.622, + "args": { + "External id": 294920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097971.364, "dur": 0.930, + "args": { + "External id": 294921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097973.951, "dur": 0.541, + "args": { + "External id": 294922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097976.273, "dur": 3.037, + "args": { + "External id": 294923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097980.819, "dur": 0.635, + "args": { + "External id": 294924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097983.138, "dur": 0.849, + "args": { + "External id": 294925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368097985.696, "dur": 0.700, + "args": { + "External id": 294926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368098005.218, "dur": 147.938, + "args": { + "External id": 294927,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368098022.293, "dur": 126.817, + "args": { + "External id": 294928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368098037.729, "dur": 12.128, + "args": { + "External id": 294929,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368098053.373, "dur": 67.934, + "args": { + "External id": 294930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368098055.690, "dur": 65.256, + "args": { + "External id": 294931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098058.945, "dur": 6.473, + "args": { + "External id": 294932,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368098066.998, "dur": 53.217, + "args": { + "External id": 294933,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2070554, "tid": 2107619, + "ts": 5333368098274.942, "dur": 574.634, + "args": { + "External id": 294934,"Record function id": 0, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070554, "tid": 2107619, + "ts": 5333368098290.227, "dur": 547.308, + "args": { + "External id": 294935,"Record function id": 0, "Ev Idx": 1558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368098345.551, "dur": 5.360, + "args": { + "External id": 294936,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368098366.716, "dur": 24.319, + "args": { + "External id": 294937,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098371.111, "dur": 1.427, + "args": { + "External id": 294938,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098374.186, "dur": 2.438, + "args": { + "External id": 294939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098377.697, "dur": 0.291, + "args": { + "External id": 294940,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098379.367, "dur": 0.257, + "args": { + "External id": 294941,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098381.557, "dur": 0.259, + "args": { + "External id": 294942,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098382.881, "dur": 0.298, + "args": { + "External id": 294943,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098384.210, "dur": 0.264, + "args": { + "External id": 294944,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098386.184, "dur": 0.263, + "args": { + "External id": 294945,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098387.232, "dur": 0.168, + "args": { + "External id": 294946,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368098400.253, "dur": 33.764, + "args": { + "External id": 294947,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368098463.936, "dur": 99.756, + "args": { + "External id": 294948,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368098477.149, "dur": 4.521, + "args": { + "External id": 294949,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368098486.543, "dur": 9.278, + "args": { + "External id": 294950,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368098490.437, "dur": 4.939, + "args": { + "External id": 294951,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098493.543, "dur": 0.672, + "args": { + "External id": 294952,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368098502.655, "dur": 20.439, + "args": { + "External id": 294953,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098504.105, "dur": 0.304, + "args": { + "External id": 294954,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098506.185, "dur": 0.443, + "args": { + "External id": 294955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098507.582, "dur": 0.463, + "args": { + "External id": 294956,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098509.168, "dur": 1.241, + "args": { + "External id": 294957,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098511.180, "dur": 0.163, + "args": { + "External id": 294958,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098512.463, "dur": 0.308, + "args": { + "External id": 294959,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098515.085, "dur": 1.754, + "args": { + "External id": 294960,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098518.349, "dur": 0.379, + "args": { + "External id": 294961,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368098519.707, "dur": 0.330, + "args": { + "External id": 294962,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368098532.012, "dur": 24.538, + "args": { + "External id": 294963,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368098605.220, "dur": 162.421, + "args": { + "External id": 294964,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368098669.445, "dur": 94.454, + "args": { + "External id": 294965,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1588, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368098683.108, "dur": 75.994, + "args": { + "External id": 294966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368098783.253, "dur": 1.819, + "args": { + "External id": 294967,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1590, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368098866.438, "dur": 1623.548, + "args": { + "External id": 294968,"Sequence number": 1209204, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1591 + } + }, + { + "ph": "f", "id": 28, "pid": 2070554, "tid": 2107619, "ts": 5333368098866.438, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368098975.457, "dur": 107.877, + "args": { + "External id": 294969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368099125.301, "dur": 40.033, + "args": { + "External id": 294970,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368099202.317, "dur": 56.569, + "args": { + "External id": 294971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368099268.584, "dur": 34.026, + "args": { + "External id": 294972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368099309.631, "dur": 45.953, + "args": { + "External id": 294973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368099363.661, "dur": 27.735, + "args": { + "External id": 294974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368099400.497, "dur": 42.697, + "args": { + "External id": 294975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368099468.163, "dur": 23.036, + "args": { + "External id": 294976,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368099512.988, "dur": 27.388, + "args": { + "External id": 294977,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368099560.371, "dur": 19.527, + "args": { + "External id": 294978,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368099593.963, "dur": 15.080, + "args": { + "External id": 294979,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368099617.342, "dur": 73.393, + "args": { + "External id": 294980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368099696.590, "dur": 35.251, + "args": { + "External id": 294981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368099765.383, "dur": 167.544, + "args": { + "External id": 294982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368099843.582, "dur": 6.175, + "args": { + "External id": 294983,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368099851.359, "dur": 3.409, + "args": { + "External id": 294984,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368099968.293, "dur": 25.826, + "args": { + "External id": 294985,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368100006.870, "dur": 13.756, + "args": { + "External id": 294986,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368100028.635, "dur": 33.659, + "args": { + "External id": 294987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368100067.521, "dur": 33.992, + "args": { + "External id": 294988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368100109.847, "dur": 23.118, + "args": { + "External id": 294989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368100138.575, "dur": 46.480, + "args": { + "External id": 294990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368100194.568, "dur": 25.992, + "args": { + "External id": 294991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368100226.118, "dur": 42.871, + "args": { + "External id": 294992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368100298.555, "dur": 29.259, + "args": { + "External id": 294993,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368100344.151, "dur": 25.392, + "args": { + "External id": 294994,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368100387.248, "dur": 16.101, + "args": { + "External id": 294995,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368100418.997, "dur": 13.215, + "args": { + "External id": 294996,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368100447.253, "dur": 14.333, + "args": { + "External id": 294997,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100531.802, "dur": 14.055, + "args": { + "External id": 294998,"Record function id": 0, "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100534.995, "dur": 9.884, + "args": { + "External id": 294999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100538.839, "dur": 5.089, + "args": { + "External id": 295000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100540.120, "dur": 3.715, + "args": { + "External id": 295001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100549.736, "dur": 3.813, + "args": { + "External id": 295002,"Record function id": 0, "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100550.850, "dur": 2.277, + "args": { + "External id": 295003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100551.490, "dur": 1.180, + "args": { + "External id": 295004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100551.897, "dur": 0.682, + "args": { + "External id": 295005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100556.819, "dur": 3.876, + "args": { + "External id": 295006,"Record function id": 0, "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100558.096, "dur": 2.175, + "args": { + "External id": 295007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100558.771, "dur": 1.050, + "args": { + "External id": 295008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100559.301, "dur": 0.439, + "args": { + "External id": 295009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100564.352, "dur": 3.410, + "args": { + "External id": 295010,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100565.389, "dur": 1.974, + "args": { + "External id": 295011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100566.135, "dur": 0.824, + "args": { + "External id": 295012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100566.461, "dur": 0.434, + "args": { + "External id": 295013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100571.146, "dur": 4.223, + "args": { + "External id": 295014,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100572.461, "dur": 2.489, + "args": { + "External id": 295015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100572.908, "dur": 1.640, + "args": { + "External id": 295016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100573.828, "dur": 0.647, + "args": { + "External id": 295017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100578.865, "dur": 4.433, + "args": { + "External id": 295018,"Record function id": 0, "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100580.163, "dur": 2.686, + "args": { + "External id": 295019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100580.619, "dur": 1.775, + "args": { + "External id": 295020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100581.672, "dur": 0.658, + "args": { + "External id": 295021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100586.583, "dur": 5.955, + "args": { + "External id": 295022,"Record function id": 0, "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100587.895, "dur": 4.214, + "args": { + "External id": 295023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100588.341, "dur": 3.353, + "args": { + "External id": 295024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100588.927, "dur": 2.697, + "args": { + "External id": 295025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100595.831, "dur": 3.624, + "args": { + "External id": 295026,"Record function id": 0, "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100596.675, "dur": 2.344, + "args": { + "External id": 295027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100597.245, "dur": 1.343, + "args": { + "External id": 295028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100597.991, "dur": 0.527, + "args": { + "External id": 295029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100602.704, "dur": 4.035, + "args": { + "External id": 295030,"Record function id": 0, "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368100603.938, "dur": 2.392, + "args": { + "External id": 295031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100604.451, "dur": 1.433, + "args": { + "External id": 295032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368100605.372, "dur": 0.445, + "args": { + "External id": 295033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368100610.611, "dur": 36208.506, + "args": { + "External id": 295034,"Record function id": 0, "Sequence number": 1209203, "Fwd thread id": 1, "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368100611.804, "dur": 36198.790, + "args": { + "External id": 295035,"Sequence number": 1209203, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1658 + } + }, + { + "ph": "f", "id": 29, "pid": 2070554, "tid": 2107619, "ts": 5333368100611.804, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2070554, "tid": 2107619, + "ts": 5333368100676.506, "dur": 39.170, + "args": { + "External id": 295036,"Record function id": 0, "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2070554, "tid": 2107619, + "ts": 5333368100723.348, "dur": 61.716, + "args": { + "External id": 295037,"Record function id": 0, "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2070554, "tid": 2107619, + "ts": 5333368100790.999, "dur": 36011.518, + "args": { + "External id": 295038,"Record function id": 0, "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368100875.825, "dur": 6.940, + "args": { + "External id": 295039,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368100892.254, "dur": 5.163, + "args": { + "External id": 295040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368100916.000, "dur": 35072.045, + "args": { + "External id": 295041,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368100928.836, "dur": 35050.887, + "args": { + "External id": 295042,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368100967.491, "dur": 17.161, + "args": { + "External id": 295043,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368100990.723, "dur": 34951.476, + "args": { + "External id": 295044,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368100993.051, "dur": 34948.510, + "args": { + "External id": 295045,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368100997.688, "dur": 4.812, + "args": { + "External id": 295046,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368101004.034, "dur": 34933.555, + "args": { + "External id": 295047,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368136073.343, "dur": 8.344, + "args": { + "External id": 295048,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368136075.778, "dur": 5.603, + "args": { + "External id": 295049,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368136108.059, "dur": 362.673, + "args": { + "External id": 295050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368136133.151, "dur": 332.911, + "args": { + "External id": 295051,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1674, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368136144.600, "dur": 315.735, + "args": { + "External id": 295052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368136491.829, "dur": 2.407, + "args": { + "External id": 295053,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1676, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136553.855, "dur": 6.595, + "args": { + "External id": 295054,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136605.980, "dur": 1.391, + "args": { + "External id": 295055,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136668.264, "dur": 1.953, + "args": { + "External id": 295056,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136685.670, "dur": 0.866, + "args": { + "External id": 295057,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136697.301, "dur": 2.067, + "args": { + "External id": 295058,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136708.388, "dur": 0.782, + "args": { + "External id": 295059,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136719.114, "dur": 0.863, + "args": { + "External id": 295060,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136729.819, "dur": 1.621, + "args": { + "External id": 295061,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368136740.957, "dur": 2.510, + "args": { + "External id": 295062,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368136834.125, "dur": 2646.198, + "args": { + "External id": 295063,"Record function id": 0, "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2070554, "tid": 2107619, + "ts": 5333368136853.009, "dur": 974.594, + "args": { + "External id": 295064,"Record function id": 0, "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070554, "tid": 2107619, + "ts": 5333368136868.065, "dur": 314.911, + "args": { + "External id": 295065,"Record function id": 0, "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136945.338, "dur": 3.885, + "args": { + "External id": 295066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136952.198, "dur": 0.680, + "args": { + "External id": 295067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136955.053, "dur": 0.645, + "args": { + "External id": 295068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136957.539, "dur": 0.781, + "args": { + "External id": 295069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136960.099, "dur": 0.563, + "args": { + "External id": 295070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136962.448, "dur": 0.503, + "args": { + "External id": 295071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136967.849, "dur": 2.519, + "args": { + "External id": 295072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136971.806, "dur": 0.531, + "args": { + "External id": 295073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136973.813, "dur": 0.597, + "args": { + "External id": 295074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368136975.992, "dur": 0.616, + "args": { + "External id": 295075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368136993.443, "dur": 143.477, + "args": { + "External id": 295076,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368137008.598, "dur": 124.400, + "args": { + "External id": 295077,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368137023.841, "dur": 11.650, + "args": { + "External id": 295078,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368137038.917, "dur": 67.115, + "args": { + "External id": 295079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368137041.130, "dur": 64.508, + "args": { + "External id": 295080,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137045.204, "dur": 6.546, + "args": { + "External id": 295081,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368137053.710, "dur": 51.093, + "args": { + "External id": 295082,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2070554, "tid": 2107619, + "ts": 5333368137265.666, "dur": 554.706, + "args": { + "External id": 295083,"Record function id": 0, "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070554, "tid": 2107619, + "ts": 5333368137281.105, "dur": 526.966, + "args": { + "External id": 295084,"Record function id": 0, "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368137334.303, "dur": 5.048, + "args": { + "External id": 295085,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368137354.446, "dur": 26.032, + "args": { + "External id": 295086,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137359.154, "dur": 1.353, + "args": { + "External id": 295087,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137361.941, "dur": 2.232, + "args": { + "External id": 295088,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137365.576, "dur": 0.262, + "args": { + "External id": 295089,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137366.688, "dur": 0.328, + "args": { + "External id": 295090,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137369.565, "dur": 0.422, + "args": { + "External id": 295091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137370.988, "dur": 0.437, + "args": { + "External id": 295092,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137372.631, "dur": 0.236, + "args": { + "External id": 295093,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137374.785, "dur": 0.282, + "args": { + "External id": 295094,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137376.466, "dur": 0.224, + "args": { + "External id": 295095,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368137389.065, "dur": 33.376, + "args": { + "External id": 295096,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368137451.646, "dur": 95.996, + "args": { + "External id": 295097,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368137460.785, "dur": 4.674, + "args": { + "External id": 295098,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368137470.144, "dur": 8.800, + "args": { + "External id": 295099,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368137474.036, "dur": 4.501, + "args": { + "External id": 295100,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137476.901, "dur": 0.414, + "args": { + "External id": 295101,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368137485.917, "dur": 20.472, + "args": { + "External id": 295102,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137487.337, "dur": 0.593, + "args": { + "External id": 295103,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137490.187, "dur": 0.287, + "args": { + "External id": 295104,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137491.840, "dur": 0.163, + "args": { + "External id": 295105,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137493.037, "dur": 1.333, + "args": { + "External id": 295106,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137495.572, "dur": 0.284, + "args": { + "External id": 295107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137496.749, "dur": 0.379, + "args": { + "External id": 295108,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137498.710, "dur": 1.549, + "args": { + "External id": 295109,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137501.234, "dur": 0.280, + "args": { + "External id": 295110,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368137502.866, "dur": 0.447, + "args": { + "External id": 295111,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368137520.473, "dur": 20.335, + "args": { + "External id": 295112,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368137588.092, "dur": 150.148, + "args": { + "External id": 295113,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368137611.468, "dur": 123.070, + "args": { + "External id": 295114,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1737, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368137656.350, "dur": 74.015, + "args": { + "External id": 295115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368137754.454, "dur": 1.669, + "args": { + "External id": 295116,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1739, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368137834.928, "dur": 1623.606, + "args": { + "External id": 295117,"Sequence number": 1209202, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1740 + } + }, + { + "ph": "f", "id": 30, "pid": 2070554, "tid": 2107619, "ts": 5333368137834.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368137944.891, "dur": 101.174, + "args": { + "External id": 295118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368138085.051, "dur": 37.744, + "args": { + "External id": 295119,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368138139.741, "dur": 72.755, + "args": { + "External id": 295120,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368138225.134, "dur": 38.949, + "args": { + "External id": 295121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368138270.605, "dur": 45.337, + "args": { + "External id": 295122,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368138324.362, "dur": 28.352, + "args": { + "External id": 295123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368138361.273, "dur": 42.058, + "args": { + "External id": 295124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368138428.367, "dur": 23.669, + "args": { + "External id": 295125,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368138473.429, "dur": 28.221, + "args": { + "External id": 295126,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368138526.466, "dur": 17.959, + "args": { + "External id": 295127,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368138557.764, "dur": 12.784, + "args": { + "External id": 295128,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368138583.768, "dur": 32.067, + "args": { + "External id": 295129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368138659.921, "dur": 41.841, + "args": { + "External id": 295130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368138740.997, "dur": 169.442, + "args": { + "External id": 295131,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368138816.210, "dur": 6.008, + "args": { + "External id": 295132,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368138823.986, "dur": 8.102, + "args": { + "External id": 295133,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368138941.914, "dur": 25.224, + "args": { + "External id": 295134,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368138978.933, "dur": 15.231, + "args": { + "External id": 295135,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368139002.745, "dur": 36.238, + "args": { + "External id": 295136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368139043.794, "dur": 35.439, + "args": { + "External id": 295137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368139085.948, "dur": 23.562, + "args": { + "External id": 295138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368139114.720, "dur": 29.803, + "args": { + "External id": 295139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368139149.970, "dur": 38.634, + "args": { + "External id": 295140,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368139197.566, "dur": 48.969, + "args": { + "External id": 295141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368139271.891, "dur": 26.451, + "args": { + "External id": 295142,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368139316.232, "dur": 23.716, + "args": { + "External id": 295143,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368139352.146, "dur": 16.987, + "args": { + "External id": 295144,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368139383.348, "dur": 17.663, + "args": { + "External id": 295145,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368139413.722, "dur": 15.188, + "args": { + "External id": 295146,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139501.774, "dur": 16.933, + "args": { + "External id": 295147,"Record function id": 0, "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139504.767, "dur": 13.092, + "args": { + "External id": 295148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139511.893, "dur": 4.880, + "args": { + "External id": 295149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139513.205, "dur": 3.484, + "args": { + "External id": 295150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139522.703, "dur": 4.190, + "args": { + "External id": 295151,"Record function id": 0, "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139523.978, "dur": 2.422, + "args": { + "External id": 295152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139524.628, "dur": 1.299, + "args": { + "External id": 295153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139524.940, "dur": 0.899, + "args": { + "External id": 295154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139530.342, "dur": 4.360, + "args": { + "External id": 295155,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139531.591, "dur": 2.679, + "args": { + "External id": 295156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139532.243, "dur": 1.602, + "args": { + "External id": 295157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139532.784, "dur": 0.988, + "args": { + "External id": 295158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139538.091, "dur": 3.308, + "args": { + "External id": 295159,"Record function id": 0, "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139539.011, "dur": 1.973, + "args": { + "External id": 295160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139539.615, "dur": 0.953, + "args": { + "External id": 295161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139540.056, "dur": 0.439, + "args": { + "External id": 295162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139544.789, "dur": 4.986, + "args": { + "External id": 295163,"Record function id": 0, "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139545.741, "dur": 3.627, + "args": { + "External id": 295164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139546.270, "dur": 2.687, + "args": { + "External id": 295165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139546.682, "dur": 2.205, + "args": { + "External id": 295166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139552.908, "dur": 3.612, + "args": { + "External id": 295167,"Record function id": 0, "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139554.034, "dur": 2.056, + "args": { + "External id": 295168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139554.520, "dur": 1.167, + "args": { + "External id": 295169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139554.998, "dur": 0.617, + "args": { + "External id": 295170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139559.884, "dur": 3.630, + "args": { + "External id": 295171,"Record function id": 0, "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139560.975, "dur": 2.100, + "args": { + "External id": 295172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139561.474, "dur": 1.141, + "args": { + "External id": 295173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139561.867, "dur": 0.682, + "args": { + "External id": 295174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139569.607, "dur": 3.711, + "args": { + "External id": 295175,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139570.735, "dur": 2.191, + "args": { + "External id": 295176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139571.276, "dur": 1.214, + "args": { + "External id": 295177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139571.722, "dur": 0.677, + "args": { + "External id": 295178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139576.526, "dur": 3.825, + "args": { + "External id": 295179,"Record function id": 0, "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368139577.739, "dur": 2.186, + "args": { + "External id": 295180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139578.382, "dur": 1.138, + "args": { + "External id": 295181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368139578.700, "dur": 0.747, + "args": { + "External id": 295182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368139583.790, "dur": 36808.920, + "args": { + "External id": 295183,"Record function id": 0, "Sequence number": 1209201, "Fwd thread id": 1, "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368139584.894, "dur": 36799.478, + "args": { + "External id": 295184,"Sequence number": 1209201, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1807 + } + }, + { + "ph": "f", "id": 31, "pid": 2070554, "tid": 2107619, "ts": 5333368139584.894, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2070554, "tid": 2107619, + "ts": 5333368139613.436, "dur": 77.342, + "args": { + "External id": 295185,"Record function id": 0, "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2070554, "tid": 2107619, + "ts": 5333368139699.553, "dur": 67.685, + "args": { + "External id": 295186,"Record function id": 0, "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2070554, "tid": 2107619, + "ts": 5333368139772.962, "dur": 36603.687, + "args": { + "External id": 295187,"Record function id": 0, "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368139861.813, "dur": 7.730, + "args": { + "External id": 295188,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368139879.247, "dur": 4.870, + "args": { + "External id": 295189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368139897.630, "dur": 35645.841, + "args": { + "External id": 295190,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368139910.917, "dur": 35624.271, + "args": { + "External id": 295191,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368139950.956, "dur": 14.471, + "args": { + "External id": 295192,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368139971.660, "dur": 35524.875, + "args": { + "External id": 295193,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368139973.975, "dur": 35521.880, + "args": { + "External id": 295194,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368139977.485, "dur": 5.816, + "args": { + "External id": 295195,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368139984.839, "dur": 35507.312, + "args": { + "External id": 295196,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368175659.731, "dur": 9.495, + "args": { + "External id": 295197,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368175662.399, "dur": 6.388, + "args": { + "External id": 295198,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368175699.579, "dur": 384.649, + "args": { + "External id": 295199,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368175722.910, "dur": 356.711, + "args": { + "External id": 295200,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1823, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368175734.720, "dur": 339.700, + "args": { + "External id": 295201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368176099.975, "dur": 2.283, + "args": { + "External id": 295202,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1825, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176161.542, "dur": 20.812, + "args": { + "External id": 295203,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176228.571, "dur": 1.549, + "args": { + "External id": 295204,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176245.591, "dur": 0.901, + "args": { + "External id": 295205,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176259.122, "dur": 2.178, + "args": { + "External id": 295206,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176272.229, "dur": 0.891, + "args": { + "External id": 295207,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176282.729, "dur": 0.891, + "args": { + "External id": 295208,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176293.764, "dur": 0.958, + "args": { + "External id": 295209,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176305.330, "dur": 3.194, + "args": { + "External id": 295210,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176317.901, "dur": 0.687, + "args": { + "External id": 295211,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368176408.376, "dur": 2682.597, + "args": { + "External id": 295212,"Record function id": 0, "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2070554, "tid": 2107619, + "ts": 5333368176427.569, "dur": 1019.852, + "args": { + "External id": 295213,"Record function id": 0, "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070554, "tid": 2107619, + "ts": 5333368176441.731, "dur": 358.018, + "args": { + "External id": 295214,"Record function id": 0, "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176520.095, "dur": 4.216, + "args": { + "External id": 295215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176527.889, "dur": 0.685, + "args": { + "External id": 295216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176530.665, "dur": 0.787, + "args": { + "External id": 295217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176533.278, "dur": 0.636, + "args": { + "External id": 295218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176535.891, "dur": 2.144, + "args": { + "External id": 295219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176539.905, "dur": 0.702, + "args": { + "External id": 295220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176542.329, "dur": 1.729, + "args": { + "External id": 295221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176545.717, "dur": 0.583, + "args": { + "External id": 295222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176547.833, "dur": 0.588, + "args": { + "External id": 295223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368176550.073, "dur": 0.659, + "args": { + "External id": 295224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368176569.366, "dur": 195.268, + "args": { + "External id": 295225,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368176584.829, "dur": 174.933, + "args": { + "External id": 295226,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368176599.554, "dur": 12.252, + "args": { + "External id": 295227,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368176615.422, "dur": 112.086, + "args": { + "External id": 295228,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368176617.721, "dur": 109.417, + "args": { + "External id": 295229,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176658.677, "dur": 8.250, + "args": { + "External id": 295230,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368176668.790, "dur": 57.588, + "args": { + "External id": 295231,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2070554, "tid": 2107619, + "ts": 5333368176881.067, "dur": 558.340, + "args": { + "External id": 295232,"Record function id": 0, "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070554, "tid": 2107619, + "ts": 5333368176897.841, "dur": 530.126, + "args": { + "External id": 295233,"Record function id": 0, "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368176952.473, "dur": 6.627, + "args": { + "External id": 295234,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368176977.623, "dur": 25.984, + "args": { + "External id": 295235,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176982.110, "dur": 1.456, + "args": { + "External id": 295236,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176985.405, "dur": 0.874, + "args": { + "External id": 295237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176987.573, "dur": 0.502, + "args": { + "External id": 295238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176989.110, "dur": 0.426, + "args": { + "External id": 295239,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176991.378, "dur": 0.250, + "args": { + "External id": 295240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176992.621, "dur": 0.269, + "args": { + "External id": 295241,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176994.211, "dur": 0.288, + "args": { + "External id": 295242,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176995.777, "dur": 2.647, + "args": { + "External id": 295243,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368176999.523, "dur": 0.341, + "args": { + "External id": 295244,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368177012.209, "dur": 34.224, + "args": { + "External id": 295245,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368177076.954, "dur": 114.577, + "args": { + "External id": 295246,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368177086.648, "dur": 3.254, + "args": { + "External id": 295247,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368177094.527, "dur": 9.872, + "args": { + "External id": 295248,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368177098.875, "dur": 5.085, + "args": { + "External id": 295249,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177102.060, "dur": 0.549, + "args": { + "External id": 295250,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368177110.458, "dur": 22.596, + "args": { + "External id": 295251,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177112.374, "dur": 0.515, + "args": { + "External id": 295252,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177114.850, "dur": 0.487, + "args": { + "External id": 295253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177116.735, "dur": 0.442, + "args": { + "External id": 295254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177118.288, "dur": 0.890, + "args": { + "External id": 295255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177120.369, "dur": 2.090, + "args": { + "External id": 295256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177123.640, "dur": 0.412, + "args": { + "External id": 295257,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177126.369, "dur": 0.330, + "args": { + "External id": 295258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177127.728, "dur": 0.422, + "args": { + "External id": 295259,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368177129.862, "dur": 0.330, + "args": { + "External id": 295260,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368177142.446, "dur": 20.914, + "args": { + "External id": 295261,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368177238.870, "dur": 123.136, + "args": { + "External id": 295262,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368177267.653, "dur": 91.185, + "args": { + "External id": 295263,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1886, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368177276.970, "dur": 77.717, + "args": { + "External id": 295264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368177379.646, "dur": 1.883, + "args": { + "External id": 295265,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1888, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368177454.507, "dur": 1615.987, + "args": { + "External id": 295266,"Sequence number": 1209200, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1889 + } + }, + { + "ph": "f", "id": 32, "pid": 2070554, "tid": 2107619, "ts": 5333368177454.507, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368177558.371, "dur": 144.892, + "args": { + "External id": 295267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368177746.807, "dur": 38.949, + "args": { + "External id": 295268,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368177807.897, "dur": 55.224, + "args": { + "External id": 295269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368177872.766, "dur": 37.453, + "args": { + "External id": 295270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368177917.701, "dur": 45.171, + "args": { + "External id": 295271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368177969.527, "dur": 27.586, + "args": { + "External id": 295272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178003.141, "dur": 41.982, + "args": { + "External id": 295273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368178067.633, "dur": 22.353, + "args": { + "External id": 295274,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368178110.709, "dur": 28.273, + "args": { + "External id": 295275,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368178158.948, "dur": 36.677, + "args": { + "External id": 295276,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368178212.463, "dur": 15.990, + "args": { + "External id": 295277,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178237.635, "dur": 32.994, + "args": { + "External id": 295278,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178274.221, "dur": 33.704, + "args": { + "External id": 295279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368178335.602, "dur": 172.052, + "args": { + "External id": 295280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368178413.593, "dur": 5.534, + "args": { + "External id": 295281,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368178420.886, "dur": 2.768, + "args": { + "External id": 295282,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368178536.992, "dur": 23.558, + "args": { + "External id": 295283,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368178571.455, "dur": 14.122, + "args": { + "External id": 295284,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178593.176, "dur": 72.393, + "args": { + "External id": 295285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178673.922, "dur": 39.838, + "args": { + "External id": 295286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178721.328, "dur": 22.691, + "args": { + "External id": 295287,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178748.989, "dur": 30.746, + "args": { + "External id": 295288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178785.314, "dur": 21.208, + "args": { + "External id": 295289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368178813.178, "dur": 28.875, + "args": { + "External id": 295290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368178860.127, "dur": 38.465, + "args": { + "External id": 295291,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368178923.475, "dur": 26.073, + "args": { + "External id": 295292,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368178963.047, "dur": 17.359, + "args": { + "External id": 295293,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368178997.772, "dur": 15.559, + "args": { + "External id": 295294,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368179025.798, "dur": 14.918, + "args": { + "External id": 295295,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179112.520, "dur": 13.880, + "args": { + "External id": 295296,"Record function id": 0, "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179115.462, "dur": 9.974, + "args": { + "External id": 295297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179119.258, "dur": 5.204, + "args": { + "External id": 295298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179120.502, "dur": 3.861, + "args": { + "External id": 295299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179130.121, "dur": 3.797, + "args": { + "External id": 295300,"Record function id": 0, "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179131.329, "dur": 2.122, + "args": { + "External id": 295301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179131.879, "dur": 1.085, + "args": { + "External id": 295302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179132.197, "dur": 0.673, + "args": { + "External id": 295303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179137.169, "dur": 3.567, + "args": { + "External id": 295304,"Record function id": 0, "Ev Idx": 1927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179138.173, "dur": 2.159, + "args": { + "External id": 295305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179138.691, "dur": 1.216, + "args": { + "External id": 295306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179139.242, "dur": 0.576, + "args": { + "External id": 295307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179143.941, "dur": 3.836, + "args": { + "External id": 295308,"Record function id": 0, "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179145.152, "dur": 2.228, + "args": { + "External id": 295309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179145.647, "dur": 1.326, + "args": { + "External id": 295310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179146.222, "dur": 0.684, + "args": { + "External id": 295311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179150.831, "dur": 4.839, + "args": { + "External id": 295312,"Record function id": 0, "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179151.828, "dur": 3.412, + "args": { + "External id": 295313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179152.337, "dur": 2.511, + "args": { + "External id": 295314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179152.991, "dur": 1.789, + "args": { + "External id": 295315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179158.777, "dur": 3.900, + "args": { + "External id": 295316,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179159.857, "dur": 2.365, + "args": { + "External id": 295317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179160.328, "dur": 1.499, + "args": { + "External id": 295318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179161.020, "dur": 0.713, + "args": { + "External id": 295319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179181.946, "dur": 7.038, + "args": { + "External id": 295320,"Record function id": 0, "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179184.413, "dur": 3.877, + "args": { + "External id": 295321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179185.393, "dur": 2.041, + "args": { + "External id": 295322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179185.913, "dur": 1.326, + "args": { + "External id": 295323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179193.766, "dur": 3.545, + "args": { + "External id": 295324,"Record function id": 0, "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179195.072, "dur": 1.799, + "args": { + "External id": 295325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179195.548, "dur": 0.939, + "args": { + "External id": 295326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179195.908, "dur": 0.515, + "args": { + "External id": 295327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179201.138, "dur": 4.054, + "args": { + "External id": 295328,"Record function id": 0, "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368179202.457, "dur": 2.278, + "args": { + "External id": 295329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179202.981, "dur": 1.358, + "args": { + "External id": 295330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368179203.828, "dur": 0.447, + "args": { + "External id": 295331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368179209.397, "dur": 37105.332, + "args": { + "External id": 295332,"Record function id": 0, "Sequence number": 1209199, "Fwd thread id": 1, "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368179210.744, "dur": 37095.390, + "args": { + "External id": 295333,"Sequence number": 1209199, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1956 + } + }, + { + "ph": "f", "id": 33, "pid": 2070554, "tid": 2107619, "ts": 5333368179210.744, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2070554, "tid": 2107619, + "ts": 5333368179239.777, "dur": 37.903, + "args": { + "External id": 295334,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2070554, "tid": 2107619, + "ts": 5333368179285.010, "dur": 61.239, + "args": { + "External id": 295335,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2070554, "tid": 2107619, + "ts": 5333368179352.401, "dur": 36945.876, + "args": { + "External id": 295336,"Record function id": 0, "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368179437.529, "dur": 6.888, + "args": { + "External id": 295337,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368179453.325, "dur": 4.472, + "args": { + "External id": 295338,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368179472.269, "dur": 36023.327, + "args": { + "External id": 295339,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368179485.220, "dur": 36001.537, + "args": { + "External id": 295340,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368179519.917, "dur": 13.066, + "args": { + "External id": 295341,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368179538.881, "dur": 35909.350, + "args": { + "External id": 295342,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368179542.063, "dur": 35905.443, + "args": { + "External id": 295343,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368179545.606, "dur": 5.879, + "args": { + "External id": 295344,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368179553.543, "dur": 35890.388, + "args": { + "External id": 295345,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368215578.518, "dur": 9.039, + "args": { + "External id": 295346,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368215581.107, "dur": 6.214, + "args": { + "External id": 295347,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368215615.357, "dur": 386.639, + "args": { + "External id": 295348,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368215668.696, "dur": 328.563, + "args": { + "External id": 295349,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1972, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368215680.410, "dur": 309.838, + "args": { + "External id": 295350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368216020.489, "dur": 2.267, + "args": { + "External id": 295351,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1974, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216080.463, "dur": 6.307, + "args": { + "External id": 295352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216130.876, "dur": 1.116, + "args": { + "External id": 295353,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216146.490, "dur": 1.117, + "args": { + "External id": 295354,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216158.746, "dur": 2.308, + "args": { + "External id": 295355,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216191.232, "dur": 1.217, + "args": { + "External id": 295356,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216202.925, "dur": 0.724, + "args": { + "External id": 295357,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216213.920, "dur": 0.877, + "args": { + "External id": 295358,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216224.324, "dur": 2.698, + "args": { + "External id": 295359,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216235.220, "dur": 0.723, + "args": { + "External id": 295360,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368216331.905, "dur": 2628.511, + "args": { + "External id": 295361,"Record function id": 0, "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2070554, "tid": 2107619, + "ts": 5333368216350.038, "dur": 981.774, + "args": { + "External id": 295362,"Record function id": 0, "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070554, "tid": 2107619, + "ts": 5333368216363.636, "dur": 340.293, + "args": { + "External id": 295363,"Record function id": 0, "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216436.737, "dur": 3.897, + "args": { + "External id": 295364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216443.756, "dur": 0.928, + "args": { + "External id": 295365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216446.734, "dur": 0.728, + "args": { + "External id": 295366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216449.520, "dur": 0.852, + "args": { + "External id": 295367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216452.058, "dur": 2.170, + "args": { + "External id": 295368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216455.636, "dur": 0.653, + "args": { + "External id": 295369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216457.851, "dur": 1.655, + "args": { + "External id": 295370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216460.884, "dur": 0.797, + "args": { + "External id": 295371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216463.268, "dur": 0.719, + "args": { + "External id": 295372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368216465.417, "dur": 0.653, + "args": { + "External id": 295373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368216483.335, "dur": 190.629, + "args": { + "External id": 295374,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368216498.527, "dur": 170.265, + "args": { + "External id": 295375,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368216511.550, "dur": 11.918, + "args": { + "External id": 295376,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368216527.043, "dur": 69.620, + "args": { + "External id": 295377,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368216530.366, "dur": 66.032, + "args": { + "External id": 295378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216533.632, "dur": 9.354, + "args": { + "External id": 295379,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368216544.676, "dur": 51.128, + "args": { + "External id": 295380,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2070554, "tid": 2107619, + "ts": 5333368216783.203, "dur": 541.446, + "args": { + "External id": 295381,"Record function id": 0, "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070554, "tid": 2107619, + "ts": 5333368216798.211, "dur": 511.659, + "args": { + "External id": 295382,"Record function id": 0, "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368216857.800, "dur": 6.949, + "args": { + "External id": 295383,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368216879.496, "dur": 26.440, + "args": { + "External id": 295384,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216883.566, "dur": 1.339, + "args": { + "External id": 295385,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216887.646, "dur": 0.407, + "args": { + "External id": 295386,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216889.071, "dur": 0.213, + "args": { + "External id": 295387,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216890.938, "dur": 0.415, + "args": { + "External id": 295388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216892.964, "dur": 0.343, + "args": { + "External id": 295389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216894.784, "dur": 0.341, + "args": { + "External id": 295390,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216896.343, "dur": 1.020, + "args": { + "External id": 295391,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216898.770, "dur": 1.671, + "args": { + "External id": 295392,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368216901.459, "dur": 0.340, + "args": { + "External id": 295393,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368216914.948, "dur": 32.279, + "args": { + "External id": 295394,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368216980.433, "dur": 91.798, + "args": { + "External id": 295395,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368216990.025, "dur": 3.523, + "args": { + "External id": 295396,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368216998.348, "dur": 9.228, + "args": { + "External id": 295397,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368217002.330, "dur": 4.800, + "args": { + "External id": 295398,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217005.528, "dur": 0.414, + "args": { + "External id": 295399,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368217013.795, "dur": 20.907, + "args": { + "External id": 295400,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217015.672, "dur": 0.427, + "args": { + "External id": 295401,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217017.229, "dur": 0.811, + "args": { + "External id": 295402,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217019.331, "dur": 0.432, + "args": { + "External id": 295403,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217020.924, "dur": 0.272, + "args": { + "External id": 295404,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217023.972, "dur": 1.581, + "args": { + "External id": 295405,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217026.393, "dur": 0.254, + "args": { + "External id": 295406,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217028.504, "dur": 0.181, + "args": { + "External id": 295407,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217029.822, "dur": 0.238, + "args": { + "External id": 295408,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368217031.446, "dur": 0.378, + "args": { + "External id": 295409,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368217044.903, "dur": 20.517, + "args": { + "External id": 295410,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368217113.929, "dur": 125.534, + "args": { + "External id": 295411,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368217135.810, "dur": 100.032, + "args": { + "External id": 295412,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2035, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368217145.371, "dur": 86.288, + "args": { + "External id": 295413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368217255.292, "dur": 1.928, + "args": { + "External id": 295414,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2037, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368217338.969, "dur": 1601.944, + "args": { + "External id": 295415,"Sequence number": 1209198, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2038 + } + }, + { + "ph": "f", "id": 34, "pid": 2070554, "tid": 2107619, "ts": 5333368217338.969, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368217439.947, "dur": 107.137, + "args": { + "External id": 295416,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368217590.247, "dur": 76.765, + "args": { + "External id": 295417,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368217689.002, "dur": 58.622, + "args": { + "External id": 295418,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368217757.560, "dur": 35.872, + "args": { + "External id": 295419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368217801.789, "dur": 46.430, + "args": { + "External id": 295420,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368217854.304, "dur": 30.280, + "args": { + "External id": 295421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368217891.009, "dur": 43.076, + "args": { + "External id": 295422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368217957.978, "dur": 22.615, + "args": { + "External id": 295423,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368217998.597, "dur": 26.506, + "args": { + "External id": 295424,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368218056.488, "dur": 18.336, + "args": { + "External id": 295425,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368218088.128, "dur": 12.030, + "args": { + "External id": 295426,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218109.877, "dur": 31.379, + "args": { + "External id": 295427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218144.429, "dur": 49.935, + "args": { + "External id": 295428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368218226.123, "dur": 162.936, + "args": { + "External id": 295429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368218297.281, "dur": 5.837, + "args": { + "External id": 295430,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368218304.884, "dur": 3.073, + "args": { + "External id": 295431,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368218417.957, "dur": 25.982, + "args": { + "External id": 295432,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368218454.882, "dur": 14.825, + "args": { + "External id": 295433,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218477.009, "dur": 37.617, + "args": { + "External id": 295434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218520.524, "dur": 36.122, + "args": { + "External id": 295435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218563.543, "dur": 20.065, + "args": { + "External id": 295436,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218589.106, "dur": 64.689, + "args": { + "External id": 295437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218663.825, "dur": 24.851, + "args": { + "External id": 295438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368218695.132, "dur": 30.496, + "args": { + "External id": 295439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368218744.242, "dur": 21.660, + "args": { + "External id": 295440,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368218781.313, "dur": 37.351, + "args": { + "External id": 295441,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368218839.312, "dur": 16.564, + "args": { + "External id": 295442,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368218870.630, "dur": 12.811, + "args": { + "External id": 295443,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368218896.398, "dur": 17.038, + "args": { + "External id": 295444,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368218985.020, "dur": 13.606, + "args": { + "External id": 295445,"Record function id": 0, "Ev Idx": 2068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368218988.076, "dur": 9.728, + "args": { + "External id": 295446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368218992.027, "dur": 4.953, + "args": { + "External id": 295447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368218993.150, "dur": 3.700, + "args": { + "External id": 295448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219002.352, "dur": 4.147, + "args": { + "External id": 295449,"Record function id": 0, "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219003.709, "dur": 2.303, + "args": { + "External id": 295450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219004.320, "dur": 1.242, + "args": { + "External id": 295451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219004.666, "dur": 0.799, + "args": { + "External id": 295452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219009.740, "dur": 4.160, + "args": { + "External id": 295453,"Record function id": 0, "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219010.812, "dur": 2.670, + "args": { + "External id": 295454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219011.332, "dur": 1.700, + "args": { + "External id": 295455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219011.894, "dur": 1.054, + "args": { + "External id": 295456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219017.114, "dur": 3.447, + "args": { + "External id": 295457,"Record function id": 0, "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219018.060, "dur": 2.074, + "args": { + "External id": 295458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219018.602, "dur": 1.113, + "args": { + "External id": 295459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219019.011, "dur": 0.627, + "args": { + "External id": 295460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219023.628, "dur": 5.070, + "args": { + "External id": 295461,"Record function id": 0, "Ev Idx": 2084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219024.635, "dur": 3.625, + "args": { + "External id": 295462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219025.217, "dur": 2.643, + "args": { + "External id": 295463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219025.877, "dur": 1.900, + "args": { + "External id": 295464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219031.784, "dur": 3.974, + "args": { + "External id": 295465,"Record function id": 0, "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219032.653, "dur": 2.645, + "args": { + "External id": 295466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219033.397, "dur": 1.501, + "args": { + "External id": 295467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219034.082, "dur": 0.748, + "args": { + "External id": 295468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219038.956, "dur": 3.771, + "args": { + "External id": 295469,"Record function id": 0, "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219039.798, "dur": 2.503, + "args": { + "External id": 295470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219040.286, "dur": 1.583, + "args": { + "External id": 295471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219041.145, "dur": 0.651, + "args": { + "External id": 295472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219045.976, "dur": 3.668, + "args": { + "External id": 295473,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219046.962, "dur": 2.234, + "args": { + "External id": 295474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219047.466, "dur": 1.322, + "args": { + "External id": 295475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219048.008, "dur": 0.712, + "args": { + "External id": 295476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219053.120, "dur": 3.250, + "args": { + "External id": 295477,"Record function id": 0, "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368219054.111, "dur": 1.836, + "args": { + "External id": 295478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219054.613, "dur": 0.929, + "args": { + "External id": 295479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368219054.938, "dur": 0.539, + "args": { + "External id": 295480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368219060.182, "dur": 36838.402, + "args": { + "External id": 295481,"Record function id": 0, "Sequence number": 1209197, "Fwd thread id": 1, "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368219061.286, "dur": 36828.748, + "args": { + "External id": 295482,"Sequence number": 1209197, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2105 + } + }, + { + "ph": "f", "id": 35, "pid": 2070554, "tid": 2107619, "ts": 5333368219061.286, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2070554, "tid": 2107619, + "ts": 5333368219090.379, "dur": 38.526, + "args": { + "External id": 295483,"Record function id": 0, "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2070554, "tid": 2107619, + "ts": 5333368219135.965, "dur": 79.221, + "args": { + "External id": 295484,"Record function id": 0, "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2070554, "tid": 2107619, + "ts": 5333368219223.178, "dur": 36659.711, + "args": { + "External id": 295485,"Record function id": 0, "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368219310.013, "dur": 7.187, + "args": { + "External id": 295486,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368219327.484, "dur": 4.973, + "args": { + "External id": 295487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368219346.683, "dur": 35718.422, + "args": { + "External id": 295488,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368219359.521, "dur": 35697.674, + "args": { + "External id": 295489,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368219390.775, "dur": 13.666, + "args": { + "External id": 295490,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368219410.809, "dur": 35611.282, + "args": { + "External id": 295491,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368219413.104, "dur": 35608.351, + "args": { + "External id": 295492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368219416.581, "dur": 6.701, + "args": { + "External id": 295493,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368219425.140, "dur": 35592.606, + "args": { + "External id": 295494,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368255146.745, "dur": 8.429, + "args": { + "External id": 295495,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368255149.132, "dur": 5.714, + "args": { + "External id": 295496,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368255192.171, "dur": 378.855, + "args": { + "External id": 295497,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368255217.262, "dur": 348.880, + "args": { + "External id": 295498,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2121, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368255228.629, "dur": 332.162, + "args": { + "External id": 295499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368255588.851, "dur": 2.407, + "args": { + "External id": 295500,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2123, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255682.948, "dur": 10.163, + "args": { + "External id": 295501,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255735.184, "dur": 1.327, + "args": { + "External id": 295502,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255754.049, "dur": 1.018, + "args": { + "External id": 295503,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255765.613, "dur": 2.432, + "args": { + "External id": 295504,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255778.092, "dur": 1.107, + "args": { + "External id": 295505,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255789.300, "dur": 0.709, + "args": { + "External id": 295506,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255799.617, "dur": 0.783, + "args": { + "External id": 295507,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255810.803, "dur": 2.996, + "args": { + "External id": 295508,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368255822.623, "dur": 0.740, + "args": { + "External id": 295509,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368255912.425, "dur": 2581.935, + "args": { + "External id": 295510,"Record function id": 0, "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2070554, "tid": 2107619, + "ts": 5333368255931.735, "dur": 982.520, + "args": { + "External id": 295511,"Record function id": 0, "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070554, "tid": 2107619, + "ts": 5333368255945.651, "dur": 319.867, + "args": { + "External id": 295512,"Record function id": 0, "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256020.926, "dur": 3.728, + "args": { + "External id": 295513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256027.777, "dur": 0.780, + "args": { + "External id": 295514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256033.474, "dur": 0.546, + "args": { + "External id": 295515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256036.009, "dur": 0.686, + "args": { + "External id": 295516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256038.348, "dur": 1.994, + "args": { + "External id": 295517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256041.976, "dur": 0.487, + "args": { + "External id": 295518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256044.083, "dur": 1.113, + "args": { + "External id": 295519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256046.631, "dur": 0.771, + "args": { + "External id": 295520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256049.060, "dur": 0.918, + "args": { + "External id": 295521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368256051.573, "dur": 0.484, + "args": { + "External id": 295522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368256069.120, "dur": 166.805, + "args": { + "External id": 295523,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368256085.245, "dur": 146.344, + "args": { + "External id": 295524,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368256100.718, "dur": 11.811, + "args": { + "External id": 295525,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368256116.375, "dur": 84.119, + "args": { + "External id": 295526,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368256118.768, "dur": 81.425, + "args": { + "External id": 295527,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256122.176, "dur": 5.596, + "args": { + "External id": 295528,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368256129.450, "dur": 69.739, + "args": { + "External id": 295529,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2070554, "tid": 2107619, + "ts": 5333368256345.306, "dur": 561.980, + "args": { + "External id": 295530,"Record function id": 0, "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070554, "tid": 2107619, + "ts": 5333368256363.880, "dur": 530.967, + "args": { + "External id": 295531,"Record function id": 0, "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368256416.761, "dur": 7.217, + "args": { + "External id": 295532,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368256439.753, "dur": 25.223, + "args": { + "External id": 295533,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256443.695, "dur": 1.256, + "args": { + "External id": 295534,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256447.342, "dur": 0.346, + "args": { + "External id": 295535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256448.912, "dur": 0.293, + "args": { + "External id": 295536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256450.735, "dur": 0.378, + "args": { + "External id": 295537,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256452.721, "dur": 0.407, + "args": { + "External id": 295538,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256454.308, "dur": 0.333, + "args": { + "External id": 295539,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256455.646, "dur": 0.253, + "args": { + "External id": 295540,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256457.228, "dur": 2.138, + "args": { + "External id": 295541,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256461.332, "dur": 0.334, + "args": { + "External id": 295542,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368256473.744, "dur": 31.494, + "args": { + "External id": 295543,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368256535.173, "dur": 136.410, + "args": { + "External id": 295544,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368256547.664, "dur": 3.264, + "args": { + "External id": 295545,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368256555.695, "dur": 9.209, + "args": { + "External id": 295546,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368256559.701, "dur": 4.779, + "args": { + "External id": 295547,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256562.896, "dur": 0.398, + "args": { + "External id": 295548,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368256571.768, "dur": 18.515, + "args": { + "External id": 295549,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256573.344, "dur": 0.458, + "args": { + "External id": 295550,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256575.283, "dur": 0.370, + "args": { + "External id": 295551,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256576.544, "dur": 0.586, + "args": { + "External id": 295552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256578.548, "dur": 0.393, + "args": { + "External id": 295553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256580.266, "dur": 1.083, + "args": { + "External id": 295554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256582.758, "dur": 0.324, + "args": { + "External id": 295555,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256584.011, "dur": 0.404, + "args": { + "External id": 295556,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256585.941, "dur": 0.145, + "args": { + "External id": 295557,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368256587.147, "dur": 0.345, + "args": { + "External id": 295558,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368256602.244, "dur": 59.742, + "args": { + "External id": 295559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368256719.041, "dur": 111.765, + "args": { + "External id": 295560,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368256742.905, "dur": 84.765, + "args": { + "External id": 295561,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2184, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368256752.200, "dur": 71.130, + "args": { + "External id": 295562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368256844.142, "dur": 1.924, + "args": { + "External id": 295563,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2186, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368256921.029, "dur": 1554.430, + "args": { + "External id": 295564,"Sequence number": 1209196, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2187 + } + }, + { + "ph": "f", "id": 36, "pid": 2070554, "tid": 2107619, "ts": 5333368256921.029, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257025.428, "dur": 102.689, + "args": { + "External id": 295565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368257184.790, "dur": 37.677, + "args": { + "External id": 295566,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257240.087, "dur": 52.452, + "args": { + "External id": 295567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257301.232, "dur": 34.164, + "args": { + "External id": 295568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257343.428, "dur": 45.238, + "args": { + "External id": 295569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257395.639, "dur": 27.838, + "args": { + "External id": 295570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257430.193, "dur": 41.931, + "args": { + "External id": 295571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368257494.597, "dur": 22.509, + "args": { + "External id": 295572,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368257533.177, "dur": 26.548, + "args": { + "External id": 295573,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368257578.324, "dur": 19.466, + "args": { + "External id": 295574,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368257610.052, "dur": 53.324, + "args": { + "External id": 295575,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257675.394, "dur": 34.831, + "args": { + "External id": 295576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368257713.534, "dur": 33.279, + "args": { + "External id": 295577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368257775.879, "dur": 165.424, + "args": { + "External id": 295578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368257853.816, "dur": 5.933, + "args": { + "External id": 295579,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368257861.428, "dur": 2.692, + "args": { + "External id": 295580,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368257972.803, "dur": 24.476, + "args": { + "External id": 295581,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368258009.320, "dur": 13.316, + "args": { + "External id": 295582,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368258029.871, "dur": 32.997, + "args": { + "External id": 295583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368258068.609, "dur": 34.166, + "args": { + "External id": 295584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368258109.485, "dur": 21.976, + "args": { + "External id": 295585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368258137.512, "dur": 45.207, + "args": { + "External id": 295586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368258192.627, "dur": 26.854, + "args": { + "External id": 295587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368258225.069, "dur": 31.576, + "args": { + "External id": 295588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368258275.543, "dur": 22.317, + "args": { + "External id": 295589,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368258313.501, "dur": 21.679, + "args": { + "External id": 295590,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368258347.192, "dur": 29.661, + "args": { + "External id": 295591,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368258399.780, "dur": 17.012, + "args": { + "External id": 295592,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368258430.420, "dur": 15.667, + "args": { + "External id": 295593,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258515.403, "dur": 16.929, + "args": { + "External id": 295594,"Record function id": 0, "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258518.196, "dur": 13.203, + "args": { + "External id": 295595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258522.047, "dur": 8.438, + "args": { + "External id": 295596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258523.382, "dur": 7.018, + "args": { + "External id": 295597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258536.158, "dur": 4.131, + "args": { + "External id": 295598,"Record function id": 0, "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258537.283, "dur": 2.526, + "args": { + "External id": 295599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258538.014, "dur": 1.356, + "args": { + "External id": 295600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258538.687, "dur": 0.617, + "args": { + "External id": 295601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258543.608, "dur": 3.892, + "args": { + "External id": 295602,"Record function id": 0, "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258544.790, "dur": 2.202, + "args": { + "External id": 295603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258545.376, "dur": 1.193, + "args": { + "External id": 295604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258545.846, "dur": 0.633, + "args": { + "External id": 295605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258550.705, "dur": 5.349, + "args": { + "External id": 295606,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258551.863, "dur": 3.742, + "args": { + "External id": 295607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258552.359, "dur": 2.824, + "args": { + "External id": 295608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258552.858, "dur": 2.246, + "args": { + "External id": 295609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258559.130, "dur": 4.152, + "args": { + "External id": 295610,"Record function id": 0, "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258560.432, "dur": 2.417, + "args": { + "External id": 295611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258560.890, "dur": 1.535, + "args": { + "External id": 295612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258561.742, "dur": 0.612, + "args": { + "External id": 295613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258566.453, "dur": 3.726, + "args": { + "External id": 295614,"Record function id": 0, "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258567.366, "dur": 2.361, + "args": { + "External id": 295615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258568.054, "dur": 1.251, + "args": { + "External id": 295616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258568.631, "dur": 0.601, + "args": { + "External id": 295617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258573.362, "dur": 3.766, + "args": { + "External id": 295618,"Record function id": 0, "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258574.497, "dur": 2.194, + "args": { + "External id": 295619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258575.260, "dur": 1.001, + "args": { + "External id": 295620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258575.592, "dur": 0.603, + "args": { + "External id": 295621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258580.201, "dur": 3.488, + "args": { + "External id": 295622,"Record function id": 0, "Ev Idx": 2245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258581.389, "dur": 1.867, + "args": { + "External id": 295623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258581.861, "dur": 0.990, + "args": { + "External id": 295624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258582.197, "dur": 0.580, + "args": { + "External id": 295625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258586.741, "dur": 3.077, + "args": { + "External id": 295626,"Record function id": 0, "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368258587.595, "dur": 1.787, + "args": { + "External id": 295627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258588.085, "dur": 0.886, + "args": { + "External id": 295628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368258588.383, "dur": 0.522, + "args": { + "External id": 295629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368258593.609, "dur": 36394.712, + "args": { + "External id": 295630,"Record function id": 0, "Sequence number": 1209195, "Fwd thread id": 1, "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368258594.741, "dur": 36385.869, + "args": { + "External id": 295631,"Sequence number": 1209195, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2254 + } + }, + { + "ph": "f", "id": 37, "pid": 2070554, "tid": 2107619, "ts": 5333368258594.741, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2070554, "tid": 2107619, + "ts": 5333368258656.609, "dur": 36.176, + "args": { + "External id": 295632,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2070554, "tid": 2107619, + "ts": 5333368258700.804, "dur": 60.836, + "args": { + "External id": 295633,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2070554, "tid": 2107619, + "ts": 5333368258767.468, "dur": 36205.645, + "args": { + "External id": 295634,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368258852.645, "dur": 7.184, + "args": { + "External id": 295635,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368258869.589, "dur": 4.711, + "args": { + "External id": 295636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368258887.828, "dur": 35307.714, + "args": { + "External id": 295637,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368258900.764, "dur": 35286.285, + "args": { + "External id": 295638,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368258930.171, "dur": 13.974, + "args": { + "External id": 295639,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368258949.917, "dur": 35189.429, + "args": { + "External id": 295640,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368258953.046, "dur": 35185.689, + "args": { + "External id": 295641,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368258956.519, "dur": 4.907, + "args": { + "External id": 295642,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368258963.185, "dur": 35171.501, + "args": { + "External id": 295643,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368294277.406, "dur": 8.532, + "args": { + "External id": 295644,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368294279.765, "dur": 5.843, + "args": { + "External id": 295645,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368294312.536, "dur": 388.148, + "args": { + "External id": 295646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368294340.314, "dur": 355.187, + "args": { + "External id": 295647,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2270, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368294350.547, "dur": 338.818, + "args": { + "External id": 295648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368294719.990, "dur": 2.473, + "args": { + "External id": 295649,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2272, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294780.076, "dur": 6.321, + "args": { + "External id": 295650,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294829.332, "dur": 1.237, + "args": { + "External id": 295651,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294845.170, "dur": 1.367, + "args": { + "External id": 295652,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294859.538, "dur": 0.615, + "args": { + "External id": 295653,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294871.173, "dur": 0.713, + "args": { + "External id": 295654,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294881.486, "dur": 0.878, + "args": { + "External id": 295655,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294891.933, "dur": 0.718, + "args": { + "External id": 295656,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294903.421, "dur": 1.716, + "args": { + "External id": 295657,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368294914.092, "dur": 0.701, + "args": { + "External id": 295658,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368295002.412, "dur": 2579.826, + "args": { + "External id": 295659,"Record function id": 0, "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2070554, "tid": 2107619, + "ts": 5333368295021.443, "dur": 967.676, + "args": { + "External id": 295660,"Record function id": 0, "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070554, "tid": 2107619, + "ts": 5333368295034.778, "dur": 316.410, + "args": { + "External id": 295661,"Record function id": 0, "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295112.948, "dur": 3.992, + "args": { + "External id": 295662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295119.737, "dur": 0.547, + "args": { + "External id": 295663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295122.214, "dur": 1.003, + "args": { + "External id": 295664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295124.644, "dur": 1.678, + "args": { + "External id": 295665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295127.694, "dur": 0.516, + "args": { + "External id": 295666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295129.409, "dur": 0.640, + "args": { + "External id": 295667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295131.395, "dur": 1.577, + "args": { + "External id": 295668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295134.197, "dur": 0.526, + "args": { + "External id": 295669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295136.033, "dur": 0.629, + "args": { + "External id": 295670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368295137.968, "dur": 0.703, + "args": { + "External id": 295671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368295156.108, "dur": 165.613, + "args": { + "External id": 295672,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368295190.609, "dur": 126.218, + "args": { + "External id": 295673,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368295203.831, "dur": 12.082, + "args": { + "External id": 295674,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368295219.875, "dur": 68.679, + "args": { + "External id": 295675,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368295222.315, "dur": 65.965, + "args": { + "External id": 295676,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295225.762, "dur": 6.844, + "args": { + "External id": 295677,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368295236.176, "dur": 51.496, + "args": { + "External id": 295678,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2070554, "tid": 2107619, + "ts": 5333368295427.483, "dur": 554.445, + "args": { + "External id": 295679,"Record function id": 0, "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070554, "tid": 2107619, + "ts": 5333368295444.290, "dur": 525.440, + "args": { + "External id": 295680,"Record function id": 0, "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368295501.837, "dur": 4.220, + "args": { + "External id": 295681,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368295521.292, "dur": 22.125, + "args": { + "External id": 295682,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295525.461, "dur": 1.702, + "args": { + "External id": 295683,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295528.608, "dur": 0.432, + "args": { + "External id": 295684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295529.782, "dur": 0.694, + "args": { + "External id": 295685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295531.560, "dur": 0.565, + "args": { + "External id": 295686,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295533.345, "dur": 0.604, + "args": { + "External id": 295687,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295534.764, "dur": 0.319, + "args": { + "External id": 295688,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295535.914, "dur": 1.380, + "args": { + "External id": 295689,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295538.260, "dur": 0.392, + "args": { + "External id": 295690,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295539.572, "dur": 0.391, + "args": { + "External id": 295691,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368295555.058, "dur": 30.906, + "args": { + "External id": 295692,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368295618.384, "dur": 135.974, + "args": { + "External id": 295693,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368295667.495, "dur": 4.454, + "args": { + "External id": 295694,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368295680.374, "dur": 9.722, + "args": { + "External id": 295695,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368295684.462, "dur": 5.240, + "args": { + "External id": 295696,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295687.607, "dur": 0.627, + "args": { + "External id": 295697,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368295696.919, "dur": 17.627, + "args": { + "External id": 295698,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295698.642, "dur": 0.531, + "args": { + "External id": 295699,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295700.333, "dur": 0.436, + "args": { + "External id": 295700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295701.496, "dur": 0.381, + "args": { + "External id": 295701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295702.974, "dur": 1.960, + "args": { + "External id": 295702,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295705.899, "dur": 0.273, + "args": { + "External id": 295703,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295707.124, "dur": 0.357, + "args": { + "External id": 295704,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295708.238, "dur": 0.333, + "args": { + "External id": 295705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295709.411, "dur": 0.347, + "args": { + "External id": 295706,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368295710.568, "dur": 0.343, + "args": { + "External id": 295707,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368295723.657, "dur": 23.333, + "args": { + "External id": 295708,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368295796.872, "dur": 108.353, + "args": { + "External id": 295709,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368295817.210, "dur": 84.565, + "args": { + "External id": 295710,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2333, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368295826.437, "dur": 71.279, + "args": { + "External id": 295711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368295918.454, "dur": 1.716, + "args": { + "External id": 295712,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2335, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368295996.266, "dur": 1566.238, + "args": { + "External id": 295713,"Sequence number": 1209194, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2336 + } + }, + { + "ph": "f", "id": 38, "pid": 2070554, "tid": 2107619, "ts": 5333368295996.266, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296103.960, "dur": 128.224, + "args": { + "External id": 295714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368296273.562, "dur": 38.869, + "args": { + "External id": 295715,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296328.203, "dur": 54.610, + "args": { + "External id": 295716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296392.124, "dur": 34.293, + "args": { + "External id": 295717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296433.995, "dur": 47.262, + "args": { + "External id": 295718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296488.173, "dur": 28.604, + "args": { + "External id": 295719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296523.779, "dur": 42.419, + "args": { + "External id": 295720,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368296587.319, "dur": 22.147, + "args": { + "External id": 295721,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368296666.560, "dur": 30.088, + "args": { + "External id": 295722,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368296718.044, "dur": 19.712, + "args": { + "External id": 295723,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368296748.170, "dur": 15.998, + "args": { + "External id": 295724,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296772.253, "dur": 33.917, + "args": { + "External id": 295725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368296810.289, "dur": 33.135, + "args": { + "External id": 295726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368296871.487, "dur": 164.088, + "args": { + "External id": 295727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368296947.163, "dur": 6.233, + "args": { + "External id": 295728,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368296955.096, "dur": 2.251, + "args": { + "External id": 295729,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368297065.275, "dur": 22.561, + "args": { + "External id": 295730,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368297098.530, "dur": 13.462, + "args": { + "External id": 295731,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368297119.272, "dur": 36.155, + "args": { + "External id": 295732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368297160.382, "dur": 52.257, + "args": { + "External id": 295733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368297223.565, "dur": 23.910, + "args": { + "External id": 295734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368297252.106, "dur": 29.980, + "args": { + "External id": 295735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368297287.531, "dur": 22.233, + "args": { + "External id": 295736,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368297315.766, "dur": 29.463, + "args": { + "External id": 295737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368297362.856, "dur": 24.117, + "args": { + "External id": 295738,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368297402.274, "dur": 22.988, + "args": { + "External id": 295739,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368297437.311, "dur": 17.087, + "args": { + "External id": 295740,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368297467.779, "dur": 27.622, + "args": { + "External id": 295741,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368297514.699, "dur": 18.872, + "args": { + "External id": 295742,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297606.930, "dur": 52.061, + "args": { + "External id": 295743,"Record function id": 0, "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297609.964, "dur": 47.394, + "args": { + "External id": 295744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297613.846, "dur": 41.979, + "args": { + "External id": 295745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297615.080, "dur": 40.218, + "args": { + "External id": 295746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297665.851, "dur": 5.487, + "args": { + "External id": 295747,"Record function id": 0, "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297667.433, "dur": 3.434, + "args": { + "External id": 295748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297668.461, "dur": 1.922, + "args": { + "External id": 295749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297668.910, "dur": 1.355, + "args": { + "External id": 295750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297674.578, "dur": 5.427, + "args": { + "External id": 295751,"Record function id": 0, "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297675.948, "dur": 3.607, + "args": { + "External id": 295752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297676.502, "dur": 2.640, + "args": { + "External id": 295753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297676.906, "dur": 2.160, + "args": { + "External id": 295754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297683.252, "dur": 4.011, + "args": { + "External id": 295755,"Record function id": 0, "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297684.390, "dur": 2.470, + "args": { + "External id": 295756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297684.951, "dur": 1.518, + "args": { + "External id": 295757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297685.268, "dur": 1.096, + "args": { + "External id": 295758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297690.325, "dur": 3.722, + "args": { + "External id": 295759,"Record function id": 0, "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297691.740, "dur": 1.898, + "args": { + "External id": 295760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297692.199, "dur": 1.050, + "args": { + "External id": 295761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297692.638, "dur": 0.516, + "args": { + "External id": 295762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297697.214, "dur": 3.401, + "args": { + "External id": 295763,"Record function id": 0, "Ev Idx": 2386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297698.229, "dur": 1.978, + "args": { + "External id": 295764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297698.726, "dur": 1.095, + "args": { + "External id": 295765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297699.119, "dur": 0.608, + "args": { + "External id": 295766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297703.805, "dur": 3.296, + "args": { + "External id": 295767,"Record function id": 0, "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297704.691, "dur": 2.009, + "args": { + "External id": 295768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297705.202, "dur": 1.110, + "args": { + "External id": 295769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297705.560, "dur": 0.659, + "args": { + "External id": 295770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297710.206, "dur": 3.399, + "args": { + "External id": 295771,"Record function id": 0, "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297711.206, "dur": 1.992, + "args": { + "External id": 295772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297711.734, "dur": 1.070, + "args": { + "External id": 295773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297712.206, "dur": 0.499, + "args": { + "External id": 295774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297716.645, "dur": 3.240, + "args": { + "External id": 295775,"Record function id": 0, "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368297717.639, "dur": 1.842, + "args": { + "External id": 295776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297718.167, "dur": 0.934, + "args": { + "External id": 295777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368297718.535, "dur": 0.464, + "args": { + "External id": 295778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368297723.831, "dur": 37018.698, + "args": { + "External id": 295779,"Record function id": 0, "Sequence number": 1209193, "Fwd thread id": 1, "Ev Idx": 2402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368297725.038, "dur": 37009.365, + "args": { + "External id": 295780,"Sequence number": 1209193, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2403 + } + }, + { + "ph": "f", "id": 39, "pid": 2070554, "tid": 2107619, "ts": 5333368297725.038, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2070554, "tid": 2107619, + "ts": 5333368297756.377, "dur": 37.276, + "args": { + "External id": 295781,"Record function id": 0, "Ev Idx": 2404 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2070554, "tid": 2107619, + "ts": 5333368297801.001, "dur": 61.635, + "args": { + "External id": 295782,"Record function id": 0, "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2070554, "tid": 2107619, + "ts": 5333368297867.905, "dur": 36859.170, + "args": { + "External id": 295783,"Record function id": 0, "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368297953.600, "dur": 6.313, + "args": { + "External id": 295784,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368297969.013, "dur": 6.349, + "args": { + "External id": 295785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368297989.557, "dur": 36009.452, + "args": { + "External id": 295786,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368298001.924, "dur": 35989.138, + "args": { + "External id": 295787,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368298036.706, "dur": 13.333, + "args": { + "External id": 295788,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368298055.966, "dur": 35897.111, + "args": { + "External id": 295789,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368298058.509, "dur": 35893.945, + "args": { + "External id": 295790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368298062.119, "dur": 5.470, + "args": { + "External id": 295791,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368298069.223, "dur": 35879.492, + "args": { + "External id": 295792,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368334082.972, "dur": 7.880, + "args": { + "External id": 295793,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368334085.252, "dur": 5.285, + "args": { + "External id": 295794,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368334120.407, "dur": 298.306, + "args": { + "External id": 295795,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368334142.867, "dur": 271.022, + "args": { + "External id": 295796,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2419, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368334153.309, "dur": 255.498, + "args": { + "External id": 295797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368334438.176, "dur": 2.375, + "args": { + "External id": 295798,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2421, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334496.280, "dur": 6.129, + "args": { + "External id": 295799,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334546.069, "dur": 1.381, + "args": { + "External id": 295800,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334561.699, "dur": 2.635, + "args": { + "External id": 295801,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334574.127, "dur": 0.822, + "args": { + "External id": 295802,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334584.325, "dur": 0.641, + "args": { + "External id": 295803,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334593.828, "dur": 0.682, + "args": { + "External id": 295804,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334603.815, "dur": 2.408, + "args": { + "External id": 295805,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334615.969, "dur": 1.493, + "args": { + "External id": 295806,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334663.514, "dur": 1.555, + "args": { + "External id": 295807,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368334757.841, "dur": 2599.576, + "args": { + "External id": 295808,"Record function id": 0, "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2070554, "tid": 2107619, + "ts": 5333368334777.207, "dur": 977.168, + "args": { + "External id": 295809,"Record function id": 0, "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070554, "tid": 2107619, + "ts": 5333368334790.754, "dur": 298.654, + "args": { + "External id": 295810,"Record function id": 0, "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334867.517, "dur": 3.915, + "args": { + "External id": 295811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334874.535, "dur": 0.819, + "args": { + "External id": 295812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334877.168, "dur": 2.402, + "args": { + "External id": 295813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334881.106, "dur": 0.612, + "args": { + "External id": 295814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334883.206, "dur": 0.581, + "args": { + "External id": 295815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334885.106, "dur": 0.860, + "args": { + "External id": 295816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334887.319, "dur": 1.198, + "args": { + "External id": 295817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334889.746, "dur": 0.527, + "args": { + "External id": 295818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334891.416, "dur": 0.482, + "args": { + "External id": 295819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368334892.981, "dur": 0.543, + "args": { + "External id": 295820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368334915.112, "dur": 147.624, + "args": { + "External id": 295821,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368334930.238, "dur": 125.450, + "args": { + "External id": 295822,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368334942.755, "dur": 13.617, + "args": { + "External id": 295823,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368334959.674, "dur": 69.885, + "args": { + "External id": 295824,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368334962.388, "dur": 66.851, + "args": { + "External id": 295825,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368334965.894, "dur": 4.881, + "args": { + "External id": 295826,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368334972.221, "dur": 56.324, + "args": { + "External id": 295827,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2070554, "tid": 2107619, + "ts": 5333368335163.958, "dur": 583.686, + "args": { + "External id": 295828,"Record function id": 0, "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070554, "tid": 2107619, + "ts": 5333368335198.982, "dur": 536.542, + "args": { + "External id": 295829,"Record function id": 0, "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368335259.793, "dur": 5.857, + "args": { + "External id": 295830,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368335283.955, "dur": 23.637, + "args": { + "External id": 295831,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335288.820, "dur": 1.490, + "args": { + "External id": 295832,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335291.585, "dur": 0.788, + "args": { + "External id": 295833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335293.193, "dur": 0.511, + "args": { + "External id": 295834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335294.299, "dur": 0.358, + "args": { + "External id": 295835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335295.551, "dur": 0.463, + "args": { + "External id": 295836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335297.540, "dur": 1.965, + "args": { + "External id": 295837,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335300.431, "dur": 0.351, + "args": { + "External id": 295838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335302.220, "dur": 0.359, + "args": { + "External id": 295839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335303.616, "dur": 0.485, + "args": { + "External id": 295840,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368335317.591, "dur": 33.697, + "args": { + "External id": 295841,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368335382.762, "dur": 88.852, + "args": { + "External id": 295842,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368335392.408, "dur": 3.348, + "args": { + "External id": 295843,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368335400.567, "dur": 8.820, + "args": { + "External id": 295844,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368335404.257, "dur": 4.660, + "args": { + "External id": 295845,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335407.254, "dur": 0.526, + "args": { + "External id": 295846,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368335415.636, "dur": 17.886, + "args": { + "External id": 295847,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335417.460, "dur": 0.251, + "args": { + "External id": 295848,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335419.027, "dur": 0.447, + "args": { + "External id": 295849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335420.323, "dur": 1.767, + "args": { + "External id": 295850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335423.243, "dur": 0.271, + "args": { + "External id": 295851,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335424.386, "dur": 0.491, + "args": { + "External id": 295852,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335425.955, "dur": 0.285, + "args": { + "External id": 295853,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335427.298, "dur": 0.338, + "args": { + "External id": 295854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335428.906, "dur": 0.431, + "args": { + "External id": 295855,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368335430.081, "dur": 0.590, + "args": { + "External id": 295856,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368335443.445, "dur": 20.805, + "args": { + "External id": 295857,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368335513.532, "dur": 151.617, + "args": { + "External id": 295858,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368335536.084, "dur": 124.893, + "args": { + "External id": 295859,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2482, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368335545.120, "dur": 71.451, + "args": { + "External id": 295860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368335681.413, "dur": 2.466, + "args": { + "External id": 295861,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2484, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368335761.085, "dur": 1576.900, + "args": { + "External id": 295862,"Sequence number": 1209192, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2485 + } + }, + { + "ph": "f", "id": 40, "pid": 2070554, "tid": 2107619, "ts": 5333368335761.085, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368335869.575, "dur": 113.027, + "args": { + "External id": 295863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368336023.412, "dur": 37.362, + "args": { + "External id": 295864,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336076.003, "dur": 50.055, + "args": { + "External id": 295865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336136.403, "dur": 51.466, + "args": { + "External id": 295866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336197.365, "dur": 48.961, + "args": { + "External id": 295867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336253.298, "dur": 27.986, + "args": { + "External id": 295868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336288.677, "dur": 41.403, + "args": { + "External id": 295869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368336354.344, "dur": 24.148, + "args": { + "External id": 295870,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368336394.907, "dur": 27.705, + "args": { + "External id": 295871,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368336440.897, "dur": 20.332, + "args": { + "External id": 295872,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368336471.979, "dur": 15.083, + "args": { + "External id": 295873,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336494.307, "dur": 30.209, + "args": { + "External id": 295874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336527.338, "dur": 32.308, + "args": { + "External id": 295875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368336585.657, "dur": 204.234, + "args": { + "External id": 295876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368336698.116, "dur": 6.739, + "args": { + "External id": 295877,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368336707.243, "dur": 3.242, + "args": { + "External id": 295878,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368336820.383, "dur": 25.056, + "args": { + "External id": 295879,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368336856.653, "dur": 16.521, + "args": { + "External id": 295880,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336880.674, "dur": 39.475, + "args": { + "External id": 295881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336929.375, "dur": 35.225, + "args": { + "External id": 295882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336972.248, "dur": 21.954, + "args": { + "External id": 295883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368336998.612, "dur": 29.651, + "args": { + "External id": 295884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368337033.496, "dur": 20.770, + "args": { + "External id": 295885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368337059.789, "dur": 30.198, + "args": { + "External id": 295886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368337108.448, "dur": 22.187, + "args": { + "External id": 295887,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368337146.092, "dur": 39.390, + "args": { + "External id": 295888,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368337210.591, "dur": 30.460, + "args": { + "External id": 295889,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368337265.031, "dur": 14.916, + "args": { + "External id": 295890,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368337292.093, "dur": 14.633, + "args": { + "External id": 295891,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337382.652, "dur": 14.427, + "args": { + "External id": 295892,"Record function id": 0, "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337385.593, "dur": 10.395, + "args": { + "External id": 295893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337389.675, "dur": 5.355, + "args": { + "External id": 295894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337390.994, "dur": 3.920, + "args": { + "External id": 295895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337400.840, "dur": 4.328, + "args": { + "External id": 295896,"Record function id": 0, "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337402.129, "dur": 2.497, + "args": { + "External id": 295897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337402.739, "dur": 1.420, + "args": { + "External id": 295898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337403.373, "dur": 0.717, + "args": { + "External id": 295899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337408.474, "dur": 3.553, + "args": { + "External id": 295900,"Record function id": 0, "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337409.592, "dur": 1.965, + "args": { + "External id": 295901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337410.117, "dur": 0.977, + "args": { + "External id": 295902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337410.406, "dur": 0.598, + "args": { + "External id": 295903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337415.280, "dur": 5.273, + "args": { + "External id": 295904,"Record function id": 0, "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337416.392, "dur": 3.725, + "args": { + "External id": 295905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337416.923, "dur": 2.759, + "args": { + "External id": 295906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337417.339, "dur": 2.251, + "args": { + "External id": 295907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337423.756, "dur": 3.733, + "args": { + "External id": 295908,"Record function id": 0, "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337424.871, "dur": 2.162, + "args": { + "External id": 295909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337425.370, "dur": 1.206, + "args": { + "External id": 295910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337425.668, "dur": 0.825, + "args": { + "External id": 295911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337430.584, "dur": 5.408, + "args": { + "External id": 295912,"Record function id": 0, "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337432.999, "dur": 2.548, + "args": { + "External id": 295913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337433.764, "dur": 1.308, + "args": { + "External id": 295914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337434.377, "dur": 0.629, + "args": { + "External id": 295915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337439.322, "dur": 3.621, + "args": { + "External id": 295916,"Record function id": 0, "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337440.373, "dur": 2.128, + "args": { + "External id": 295917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337440.849, "dur": 1.206, + "args": { + "External id": 295918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337441.512, "dur": 0.443, + "args": { + "External id": 295919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337446.015, "dur": 3.684, + "args": { + "External id": 295920,"Record function id": 0, "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337447.048, "dur": 2.208, + "args": { + "External id": 295921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337447.490, "dur": 1.322, + "args": { + "External id": 295922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337448.133, "dur": 0.607, + "args": { + "External id": 295923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337452.737, "dur": 3.775, + "args": { + "External id": 295924,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368337453.972, "dur": 2.111, + "args": { + "External id": 295925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337454.448, "dur": 1.213, + "args": { + "External id": 295926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368337454.967, "dur": 0.621, + "args": { + "External id": 295927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368337460.113, "dur": 39124.612, + "args": { + "External id": 295928,"Record function id": 0, "Sequence number": 1209191, "Fwd thread id": 1, "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368337461.368, "dur": 39114.013, + "args": { + "External id": 295929,"Sequence number": 1209191, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2552 + } + }, + { + "ph": "f", "id": 41, "pid": 2070554, "tid": 2107619, "ts": 5333368337461.368, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2070554, "tid": 2107619, + "ts": 5333368337488.546, "dur": 36.083, + "args": { + "External id": 295930,"Record function id": 0, "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2070554, "tid": 2107619, + "ts": 5333368337531.633, "dur": 60.143, + "args": { + "External id": 295931,"Record function id": 0, "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2070554, "tid": 2107619, + "ts": 5333368337597.107, "dur": 38971.127, + "args": { + "External id": 295932,"Record function id": 0, "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368337719.160, "dur": 7.252, + "args": { + "External id": 295933,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368337736.942, "dur": 5.230, + "args": { + "External id": 295934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368337756.223, "dur": 38002.847, + "args": { + "External id": 295935,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368337769.306, "dur": 37981.238, + "args": { + "External id": 295936,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368337807.758, "dur": 15.361, + "args": { + "External id": 295937,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368337828.847, "dur": 37885.555, + "args": { + "External id": 295938,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368337831.243, "dur": 37882.527, + "args": { + "External id": 295939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368337835.282, "dur": 5.153, + "args": { + "External id": 295940,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368337842.100, "dur": 37868.246, + "args": { + "External id": 295941,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368375843.078, "dur": 8.806, + "args": { + "External id": 295942,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368375845.531, "dur": 6.020, + "args": { + "External id": 295943,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368375879.424, "dur": 413.470, + "args": { + "External id": 295944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368375904.567, "dur": 383.230, + "args": { + "External id": 295945,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2568, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368375915.232, "dur": 362.814, + "args": { + "External id": 295946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368376312.289, "dur": 2.463, + "args": { + "External id": 295947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2570, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376374.009, "dur": 6.027, + "args": { + "External id": 295948,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376423.957, "dur": 1.197, + "args": { + "External id": 295949,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376440.117, "dur": 1.141, + "args": { + "External id": 295950,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376452.322, "dur": 2.510, + "args": { + "External id": 295951,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376466.658, "dur": 0.896, + "args": { + "External id": 295952,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376477.018, "dur": 0.781, + "args": { + "External id": 295953,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376488.032, "dur": 0.734, + "args": { + "External id": 295954,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376498.964, "dur": 3.226, + "args": { + "External id": 295955,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376511.595, "dur": 0.616, + "args": { + "External id": 295956,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368376598.066, "dur": 2616.825, + "args": { + "External id": 295957,"Record function id": 0, "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2070554, "tid": 2107619, + "ts": 5333368376617.160, "dur": 967.046, + "args": { + "External id": 295958,"Record function id": 0, "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070554, "tid": 2107619, + "ts": 5333368376668.764, "dur": 301.180, + "args": { + "External id": 295959,"Record function id": 0, "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376747.190, "dur": 4.397, + "args": { + "External id": 295960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376755.135, "dur": 0.684, + "args": { + "External id": 295961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376757.815, "dur": 0.781, + "args": { + "External id": 295962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376760.291, "dur": 0.675, + "args": { + "External id": 295963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376762.418, "dur": 2.174, + "args": { + "External id": 295964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376766.195, "dur": 0.468, + "args": { + "External id": 295965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376768.370, "dur": 1.371, + "args": { + "External id": 295966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376771.388, "dur": 0.538, + "args": { + "External id": 295967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376773.616, "dur": 0.625, + "args": { + "External id": 295968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368376776.049, "dur": 0.571, + "args": { + "External id": 295969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368376797.490, "dur": 145.445, + "args": { + "External id": 295970,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368376813.003, "dur": 125.510, + "args": { + "External id": 295971,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368376825.266, "dur": 12.015, + "args": { + "External id": 295972,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368376840.661, "dur": 70.033, + "args": { + "External id": 295973,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368376843.021, "dur": 67.325, + "args": { + "External id": 295974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368376846.523, "dur": 5.641, + "args": { + "External id": 295975,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368376853.605, "dur": 56.147, + "args": { + "External id": 295976,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2070554, "tid": 2107619, + "ts": 5333368377045.538, "dur": 531.211, + "args": { + "External id": 295977,"Record function id": 0, "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070554, "tid": 2107619, + "ts": 5333368377060.823, "dur": 504.163, + "args": { + "External id": 295978,"Record function id": 0, "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368377115.190, "dur": 5.769, + "args": { + "External id": 295979,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368377136.309, "dur": 26.795, + "args": { + "External id": 295980,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377144.146, "dur": 1.430, + "args": { + "External id": 295981,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377146.899, "dur": 0.353, + "args": { + "External id": 295982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377147.970, "dur": 0.342, + "args": { + "External id": 295983,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377150.019, "dur": 0.372, + "args": { + "External id": 295984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377151.823, "dur": 0.388, + "args": { + "External id": 295985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377153.631, "dur": 0.256, + "args": { + "External id": 295986,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377155.063, "dur": 0.227, + "args": { + "External id": 295987,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377156.538, "dur": 2.118, + "args": { + "External id": 295988,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377159.657, "dur": 0.395, + "args": { + "External id": 295989,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368377189.716, "dur": 34.155, + "args": { + "External id": 295990,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368377259.860, "dur": 96.179, + "args": { + "External id": 295991,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368377269.995, "dur": 4.271, + "args": { + "External id": 295992,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368377280.273, "dur": 10.229, + "args": { + "External id": 295993,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368377284.435, "dur": 5.616, + "args": { + "External id": 295994,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377287.727, "dur": 0.834, + "args": { + "External id": 295995,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368377297.660, "dur": 21.355, + "args": { + "External id": 295996,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377299.202, "dur": 0.292, + "args": { + "External id": 295997,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377303.797, "dur": 0.390, + "args": { + "External id": 295998,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377305.133, "dur": 0.568, + "args": { + "External id": 295999,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377306.942, "dur": 0.475, + "args": { + "External id": 296000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377308.599, "dur": 1.484, + "args": { + "External id": 296001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377311.412, "dur": 0.390, + "args": { + "External id": 296002,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377312.694, "dur": 0.212, + "args": { + "External id": 296003,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377314.140, "dur": 0.385, + "args": { + "External id": 296004,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368377315.639, "dur": 0.511, + "args": { + "External id": 296005,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368377328.900, "dur": 20.030, + "args": { + "External id": 296006,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368377398.222, "dur": 108.331, + "args": { + "External id": 296007,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368377420.387, "dur": 82.888, + "args": { + "External id": 296008,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2631, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368377429.871, "dur": 69.293, + "args": { + "External id": 296009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368377519.466, "dur": 1.619, + "args": { + "External id": 296010,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2633, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368377591.389, "dur": 1602.558, + "args": { + "External id": 296011,"Sequence number": 1209190, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2634 + } + }, + { + "ph": "f", "id": 42, "pid": 2070554, "tid": 2107619, "ts": 5333368377591.389, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368377746.232, "dur": 106.055, + "args": { + "External id": 296012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368377893.014, "dur": 37.755, + "args": { + "External id": 296013,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368377946.698, "dur": 52.394, + "args": { + "External id": 296014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378008.611, "dur": 36.004, + "args": { + "External id": 296015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378050.690, "dur": 46.406, + "args": { + "External id": 296016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378104.433, "dur": 28.376, + "args": { + "External id": 296017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378140.013, "dur": 56.413, + "args": { + "External id": 296018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368378222.108, "dur": 25.070, + "args": { + "External id": 296019,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368378265.168, "dur": 27.369, + "args": { + "External id": 296020,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368378310.386, "dur": 20.175, + "args": { + "External id": 296021,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368378342.353, "dur": 15.576, + "args": { + "External id": 296022,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378367.158, "dur": 33.559, + "args": { + "External id": 296023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378403.646, "dur": 33.887, + "args": { + "External id": 296024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368378463.391, "dur": 198.288, + "args": { + "External id": 296025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368378535.230, "dur": 6.244, + "args": { + "External id": 296026,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368378543.580, "dur": 2.333, + "args": { + "External id": 296027,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368378695.693, "dur": 27.528, + "args": { + "External id": 296028,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368378733.542, "dur": 14.593, + "args": { + "External id": 296029,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378758.584, "dur": 39.812, + "args": { + "External id": 296030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378803.939, "dur": 35.757, + "args": { + "External id": 296031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378846.228, "dur": 19.484, + "args": { + "External id": 296032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378869.605, "dur": 28.553, + "args": { + "External id": 296033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378904.955, "dur": 21.315, + "args": { + "External id": 296034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368378931.590, "dur": 30.322, + "args": { + "External id": 296035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368378978.455, "dur": 21.086, + "args": { + "External id": 296036,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368379014.836, "dur": 24.185, + "args": { + "External id": 296037,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368379052.141, "dur": 28.313, + "args": { + "External id": 296038,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368379102.854, "dur": 19.459, + "args": { + "External id": 296039,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368379133.338, "dur": 15.415, + "args": { + "External id": 296040,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379238.605, "dur": 17.250, + "args": { + "External id": 296041,"Record function id": 0, "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379244.620, "dur": 10.289, + "args": { + "External id": 296042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379248.660, "dur": 5.240, + "args": { + "External id": 296043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379249.878, "dur": 3.933, + "args": { + "External id": 296044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379259.579, "dur": 3.803, + "args": { + "External id": 296045,"Record function id": 0, "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379260.674, "dur": 2.263, + "args": { + "External id": 296046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379261.273, "dur": 1.213, + "args": { + "External id": 296047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379261.610, "dur": 0.769, + "args": { + "External id": 296048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379266.591, "dur": 5.612, + "args": { + "External id": 296049,"Record function id": 0, "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379267.862, "dur": 3.886, + "args": { + "External id": 296050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379268.391, "dur": 2.933, + "args": { + "External id": 296051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379268.806, "dur": 2.432, + "args": { + "External id": 296052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379275.513, "dur": 3.952, + "args": { + "External id": 296053,"Record function id": 0, "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379276.805, "dur": 2.206, + "args": { + "External id": 296054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379277.353, "dur": 1.241, + "args": { + "External id": 296055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379277.663, "dur": 0.846, + "args": { + "External id": 296056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379282.551, "dur": 3.775, + "args": { + "External id": 296057,"Record function id": 0, "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379283.686, "dur": 2.219, + "args": { + "External id": 296058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379284.189, "dur": 1.313, + "args": { + "External id": 296059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379284.865, "dur": 0.564, + "args": { + "External id": 296060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379289.497, "dur": 4.010, + "args": { + "External id": 296061,"Record function id": 0, "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379290.709, "dur": 2.350, + "args": { + "External id": 296062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379291.211, "dur": 1.439, + "args": { + "External id": 296063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379291.904, "dur": 0.678, + "args": { + "External id": 296064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379296.653, "dur": 3.592, + "args": { + "External id": 296065,"Record function id": 0, "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379297.652, "dur": 2.162, + "args": { + "External id": 296066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379298.161, "dur": 1.235, + "args": { + "External id": 296067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379298.662, "dur": 0.627, + "args": { + "External id": 296068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379303.323, "dur": 3.793, + "args": { + "External id": 296069,"Record function id": 0, "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379304.368, "dur": 2.331, + "args": { + "External id": 296070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379304.833, "dur": 1.460, + "args": { + "External id": 296071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379305.539, "dur": 0.682, + "args": { + "External id": 296072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379310.169, "dur": 3.791, + "args": { + "External id": 296073,"Record function id": 0, "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368379311.292, "dur": 2.245, + "args": { + "External id": 296074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379311.766, "dur": 1.341, + "args": { + "External id": 296075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368379312.373, "dur": 0.638, + "args": { + "External id": 296076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368379318.268, "dur": 36909.664, + "args": { + "External id": 296077,"Record function id": 0, "Sequence number": 1209189, "Fwd thread id": 1, "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368379319.548, "dur": 36898.618, + "args": { + "External id": 296078,"Sequence number": 1209189, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2701 + } + }, + { + "ph": "f", "id": 43, "pid": 2070554, "tid": 2107619, "ts": 5333368379319.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2070554, "tid": 2107619, + "ts": 5333368379348.844, "dur": 38.217, + "args": { + "External id": 296079,"Record function id": 0, "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2070554, "tid": 2107619, + "ts": 5333368379394.340, "dur": 58.945, + "args": { + "External id": 296080,"Record function id": 0, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2070554, "tid": 2107619, + "ts": 5333368379458.678, "dur": 36750.456, + "args": { + "External id": 296081,"Record function id": 0, "Ev Idx": 2704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368379542.857, "dur": 6.496, + "args": { + "External id": 296082,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368379558.386, "dur": 4.627, + "args": { + "External id": 296083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368379576.612, "dur": 35811.538, + "args": { + "External id": 296084,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368379594.552, "dur": 35784.683, + "args": { + "External id": 296085,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368379673.064, "dur": 15.906, + "args": { + "External id": 296086,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368379694.983, "dur": 35643.698, + "args": { + "External id": 296087,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368379697.195, "dur": 35640.852, + "args": { + "External id": 296088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368379701.458, "dur": 5.740, + "args": { + "External id": 296089,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368379708.792, "dur": 35626.232, + "args": { + "External id": 296090,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368415472.838, "dur": 8.148, + "args": { + "External id": 296091,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368415475.158, "dur": 5.417, + "args": { + "External id": 296092,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368415507.674, "dur": 394.217, + "args": { + "External id": 296093,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368415532.417, "dur": 364.039, + "args": { + "External id": 296094,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2717, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368415542.982, "dur": 348.093, + "args": { + "External id": 296095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368415922.060, "dur": 2.369, + "args": { + "External id": 296096,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2719, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368415988.956, "dur": 6.508, + "args": { + "External id": 296097,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416040.891, "dur": 1.498, + "args": { + "External id": 296098,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416056.652, "dur": 1.066, + "args": { + "External id": 296099,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416067.930, "dur": 2.380, + "args": { + "External id": 296100,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416083.695, "dur": 0.891, + "args": { + "External id": 296101,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416094.233, "dur": 0.747, + "args": { + "External id": 296102,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416105.221, "dur": 0.564, + "args": { + "External id": 296103,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416116.088, "dur": 2.960, + "args": { + "External id": 296104,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416128.798, "dur": 0.828, + "args": { + "External id": 296105,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368416243.656, "dur": 2644.459, + "args": { + "External id": 296106,"Record function id": 0, "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2070554, "tid": 2107619, + "ts": 5333368416263.753, "dur": 978.559, + "args": { + "External id": 296107,"Record function id": 0, "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070554, "tid": 2107619, + "ts": 5333368416278.362, "dur": 302.395, + "args": { + "External id": 296108,"Record function id": 0, "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416360.570, "dur": 4.393, + "args": { + "External id": 296109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416368.435, "dur": 0.745, + "args": { + "External id": 296110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416371.444, "dur": 0.585, + "args": { + "External id": 296111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416373.942, "dur": 0.674, + "args": { + "External id": 296112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416376.348, "dur": 0.462, + "args": { + "External id": 296113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416381.995, "dur": 1.984, + "args": { + "External id": 296114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416385.815, "dur": 1.519, + "args": { + "External id": 296115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416388.741, "dur": 0.445, + "args": { + "External id": 296116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416390.826, "dur": 0.603, + "args": { + "External id": 296117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368416393.199, "dur": 0.627, + "args": { + "External id": 296118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368416412.671, "dur": 141.395, + "args": { + "External id": 296119,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368416428.033, "dur": 122.205, + "args": { + "External id": 296120,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368416440.652, "dur": 12.171, + "args": { + "External id": 296121,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368416456.510, "dur": 65.397, + "args": { + "External id": 296122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368416458.840, "dur": 62.762, + "args": { + "External id": 296123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416462.185, "dur": 5.552, + "args": { + "External id": 296124,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368416469.285, "dur": 51.777, + "args": { + "External id": 296125,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2070554, "tid": 2107619, + "ts": 5333368416698.789, "dur": 536.751, + "args": { + "External id": 296126,"Record function id": 0, "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070554, "tid": 2107619, + "ts": 5333368416714.586, "dur": 508.837, + "args": { + "External id": 296127,"Record function id": 0, "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368416772.020, "dur": 5.104, + "args": { + "External id": 296128,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368416792.653, "dur": 26.288, + "args": { + "External id": 296129,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416797.336, "dur": 1.366, + "args": { + "External id": 296130,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416800.422, "dur": 1.880, + "args": { + "External id": 296131,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416803.495, "dur": 0.413, + "args": { + "External id": 296132,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416805.319, "dur": 0.417, + "args": { + "External id": 296133,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416807.846, "dur": 0.838, + "args": { + "External id": 296134,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416809.800, "dur": 0.700, + "args": { + "External id": 296135,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416811.813, "dur": 0.415, + "args": { + "External id": 296136,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416813.282, "dur": 0.386, + "args": { + "External id": 296137,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416814.950, "dur": 0.354, + "args": { + "External id": 296138,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368416828.984, "dur": 36.401, + "args": { + "External id": 296139,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368416896.679, "dur": 93.055, + "args": { + "External id": 296140,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368416909.505, "dur": 4.541, + "args": { + "External id": 296141,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368416918.768, "dur": 9.119, + "args": { + "External id": 296142,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368416922.786, "dur": 4.658, + "args": { + "External id": 296143,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416925.697, "dur": 0.502, + "args": { + "External id": 296144,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368416934.296, "dur": 19.828, + "args": { + "External id": 296145,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416935.870, "dur": 0.364, + "args": { + "External id": 296146,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416937.274, "dur": 0.292, + "args": { + "External id": 296147,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416938.921, "dur": 0.439, + "args": { + "External id": 296148,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416940.619, "dur": 0.582, + "args": { + "External id": 296149,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416942.784, "dur": 0.597, + "args": { + "External id": 296150,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416944.376, "dur": 0.290, + "args": { + "External id": 296151,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416946.015, "dur": 2.116, + "args": { + "External id": 296152,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416949.181, "dur": 0.281, + "args": { + "External id": 296153,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368416951.217, "dur": 0.289, + "args": { + "External id": 296154,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368416963.628, "dur": 19.140, + "args": { + "External id": 296155,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368417032.900, "dur": 109.690, + "args": { + "External id": 296156,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368417055.202, "dur": 84.197, + "args": { + "External id": 296157,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2780, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368417064.615, "dur": 70.608, + "args": { + "External id": 296158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368417154.913, "dur": 1.707, + "args": { + "External id": 296159,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2782, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368417250.372, "dur": 1613.908, + "args": { + "External id": 296160,"Sequence number": 1209188, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2783 + } + }, + { + "ph": "f", "id": 44, "pid": 2070554, "tid": 2107619, "ts": 5333368417250.372, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368417355.633, "dur": 113.593, + "args": { + "External id": 296161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368417506.022, "dur": 41.525, + "args": { + "External id": 296162,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368417562.446, "dur": 53.269, + "args": { + "External id": 296163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368417669.262, "dur": 39.858, + "args": { + "External id": 296164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368417716.365, "dur": 48.586, + "args": { + "External id": 296165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368417771.821, "dur": 28.526, + "args": { + "External id": 296166,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368417807.800, "dur": 44.947, + "args": { + "External id": 296167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368417876.933, "dur": 24.813, + "args": { + "External id": 296168,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368417921.557, "dur": 28.455, + "args": { + "External id": 296169,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368417968.819, "dur": 20.175, + "args": { + "External id": 296170,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368418001.067, "dur": 15.919, + "args": { + "External id": 296171,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418023.567, "dur": 30.163, + "args": { + "External id": 296172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418056.953, "dur": 32.458, + "args": { + "External id": 296173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368418117.984, "dur": 180.789, + "args": { + "External id": 296174,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368418206.483, "dur": 6.624, + "args": { + "External id": 296175,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368418215.276, "dur": 2.838, + "args": { + "External id": 296176,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368418329.058, "dur": 27.991, + "args": { + "External id": 296177,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368418369.842, "dur": 13.863, + "args": { + "External id": 296178,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418391.827, "dur": 42.656, + "args": { + "External id": 296179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418439.863, "dur": 36.601, + "args": { + "External id": 296180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418483.872, "dur": 20.883, + "args": { + "External id": 296181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418509.032, "dur": 30.650, + "args": { + "External id": 296182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418544.760, "dur": 18.453, + "args": { + "External id": 296183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368418568.520, "dur": 31.824, + "args": { + "External id": 296184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368418616.093, "dur": 56.948, + "args": { + "External id": 296185,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368418693.228, "dur": 40.269, + "args": { + "External id": 296186,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368418754.583, "dur": 18.989, + "args": { + "External id": 296187,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368418789.283, "dur": 15.354, + "args": { + "External id": 296188,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368418820.174, "dur": 15.272, + "args": { + "External id": 296189,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418909.983, "dur": 13.454, + "args": { + "External id": 296190,"Record function id": 0, "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418912.843, "dur": 9.643, + "args": { + "External id": 296191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418916.563, "dur": 5.017, + "args": { + "External id": 296192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418917.741, "dur": 3.721, + "args": { + "External id": 296193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418927.110, "dur": 4.360, + "args": { + "External id": 296194,"Record function id": 0, "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418928.379, "dur": 2.551, + "args": { + "External id": 296195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418929.035, "dur": 1.455, + "args": { + "External id": 296196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418929.570, "dur": 0.811, + "args": { + "External id": 296197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418934.779, "dur": 3.480, + "args": { + "External id": 296198,"Record function id": 0, "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418935.740, "dur": 2.049, + "args": { + "External id": 296199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418936.262, "dur": 1.126, + "args": { + "External id": 296200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418936.844, "dur": 0.459, + "args": { + "External id": 296201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418941.495, "dur": 3.861, + "args": { + "External id": 296202,"Record function id": 0, "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418942.699, "dur": 2.181, + "args": { + "External id": 296203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418943.442, "dur": 1.014, + "args": { + "External id": 296204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418943.724, "dur": 0.656, + "args": { + "External id": 296205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418948.430, "dur": 4.147, + "args": { + "External id": 296206,"Record function id": 0, "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418949.852, "dur": 2.232, + "args": { + "External id": 296207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418950.346, "dur": 1.299, + "args": { + "External id": 296208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418951.064, "dur": 0.509, + "args": { + "External id": 296209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418955.684, "dur": 3.796, + "args": { + "External id": 296210,"Record function id": 0, "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418956.841, "dur": 2.204, + "args": { + "External id": 296211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418957.546, "dur": 1.070, + "args": { + "External id": 296212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418957.981, "dur": 0.571, + "args": { + "External id": 296213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418962.604, "dur": 4.960, + "args": { + "External id": 296214,"Record function id": 0, "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418963.999, "dur": 3.111, + "args": { + "External id": 296215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418964.504, "dur": 2.209, + "args": { + "External id": 296216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418965.028, "dur": 1.612, + "args": { + "External id": 296217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418970.629, "dur": 3.282, + "args": { + "External id": 296218,"Record function id": 0, "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418971.684, "dur": 1.764, + "args": { + "External id": 296219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418972.150, "dur": 0.892, + "args": { + "External id": 296220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418972.480, "dur": 0.491, + "args": { + "External id": 296221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418977.015, "dur": 3.244, + "args": { + "External id": 296222,"Record function id": 0, "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368418978.089, "dur": 1.738, + "args": { + "External id": 296223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418978.592, "dur": 0.842, + "args": { + "External id": 296224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368418978.916, "dur": 0.444, + "args": { + "External id": 296225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368418984.052, "dur": 37243.080, + "args": { + "External id": 296226,"Record function id": 0, "Sequence number": 1209187, "Fwd thread id": 1, "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368418985.161, "dur": 37233.003, + "args": { + "External id": 296227,"Sequence number": 1209187, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2850 + } + }, + { + "ph": "f", "id": 45, "pid": 2070554, "tid": 2107619, "ts": 5333368418985.161, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2070554, "tid": 2107619, + "ts": 5333368419013.668, "dur": 37.333, + "args": { + "External id": 296228,"Record function id": 0, "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2070554, "tid": 2107619, + "ts": 5333368419058.400, "dur": 59.992, + "args": { + "External id": 296229,"Record function id": 0, "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2070554, "tid": 2107619, + "ts": 5333368419123.616, "dur": 37086.101, + "args": { + "External id": 296230,"Record function id": 0, "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368419227.385, "dur": 7.652, + "args": { + "External id": 296231,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368419245.574, "dur": 4.920, + "args": { + "External id": 296232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368419265.853, "dur": 36149.487, + "args": { + "External id": 296233,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368419281.256, "dur": 36125.586, + "args": { + "External id": 296234,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368419313.542, "dur": 13.741, + "args": { + "External id": 296235,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368419333.241, "dur": 36037.065, + "args": { + "External id": 296236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368419335.759, "dur": 36034.006, + "args": { + "External id": 296237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368419339.245, "dur": 4.807, + "args": { + "External id": 296238,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368419345.729, "dur": 36020.422, + "args": { + "External id": 296239,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368455502.352, "dur": 8.175, + "args": { + "External id": 296240,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368455504.741, "dur": 5.480, + "args": { + "External id": 296241,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368455541.560, "dur": 387.345, + "args": { + "External id": 296242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368455564.824, "dur": 359.143, + "args": { + "External id": 296243,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2866, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368455575.476, "dur": 342.593, + "args": { + "External id": 296244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368455946.725, "dur": 2.281, + "args": { + "External id": 296245,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2868, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456006.330, "dur": 6.334, + "args": { + "External id": 296246,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456056.117, "dur": 1.134, + "args": { + "External id": 296247,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456071.182, "dur": 1.956, + "args": { + "External id": 296248,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456084.650, "dur": 0.746, + "args": { + "External id": 296249,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456095.527, "dur": 0.782, + "args": { + "External id": 296250,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456105.122, "dur": 0.725, + "args": { + "External id": 296251,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456115.484, "dur": 1.286, + "args": { + "External id": 296252,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456126.624, "dur": 1.596, + "args": { + "External id": 296253,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456137.385, "dur": 0.582, + "args": { + "External id": 296254,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368456242.350, "dur": 2573.766, + "args": { + "External id": 296255,"Record function id": 0, "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2070554, "tid": 2107619, + "ts": 5333368456261.659, "dur": 956.327, + "args": { + "External id": 296256,"Record function id": 0, "Ev Idx": 2879 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070554, "tid": 2107619, + "ts": 5333368456273.661, "dur": 283.146, + "args": { + "External id": 296257,"Record function id": 0, "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456347.927, "dur": 4.582, + "args": { + "External id": 296258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456355.742, "dur": 0.650, + "args": { + "External id": 296259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456357.972, "dur": 0.576, + "args": { + "External id": 296260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456359.933, "dur": 0.563, + "args": { + "External id": 296261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456362.057, "dur": 1.730, + "args": { + "External id": 296262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456365.047, "dur": 0.647, + "args": { + "External id": 296263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456367.149, "dur": 1.542, + "args": { + "External id": 296264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456370.027, "dur": 0.403, + "args": { + "External id": 296265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456371.613, "dur": 0.426, + "args": { + "External id": 296266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368456373.678, "dur": 0.400, + "args": { + "External id": 296267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368456391.832, "dur": 138.568, + "args": { + "External id": 296268,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368456408.608, "dur": 117.977, + "args": { + "External id": 296269,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368456420.672, "dur": 12.155, + "args": { + "External id": 296270,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368456436.213, "dur": 64.082, + "args": { + "External id": 296271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368456438.455, "dur": 61.537, + "args": { + "External id": 296272,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456441.535, "dur": 5.806, + "args": { + "External id": 296273,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368456448.977, "dur": 50.366, + "args": { + "External id": 296274,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2070554, "tid": 2107619, + "ts": 5333368456665.340, "dur": 545.506, + "args": { + "External id": 296275,"Record function id": 0, "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070554, "tid": 2107619, + "ts": 5333368456684.624, "dur": 512.783, + "args": { + "External id": 296276,"Record function id": 0, "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368456744.069, "dur": 6.117, + "args": { + "External id": 296277,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368456765.432, "dur": 22.841, + "args": { + "External id": 296278,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456770.104, "dur": 2.876, + "args": { + "External id": 296279,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456774.472, "dur": 0.264, + "args": { + "External id": 296280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456775.904, "dur": 0.173, + "args": { + "External id": 296281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456777.250, "dur": 0.234, + "args": { + "External id": 296282,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456778.435, "dur": 0.310, + "args": { + "External id": 296283,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456779.935, "dur": 0.363, + "args": { + "External id": 296284,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456781.364, "dur": 0.168, + "args": { + "External id": 296285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456782.363, "dur": 0.165, + "args": { + "External id": 296286,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456783.204, "dur": 1.341, + "args": { + "External id": 296287,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368456798.194, "dur": 35.974, + "args": { + "External id": 296288,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368456867.412, "dur": 86.016, + "args": { + "External id": 296289,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368456877.465, "dur": 2.924, + "args": { + "External id": 296290,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368456885.244, "dur": 9.271, + "args": { + "External id": 296291,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368456889.285, "dur": 4.812, + "args": { + "External id": 296292,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456892.271, "dur": 0.584, + "args": { + "External id": 296293,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368456900.593, "dur": 18.154, + "args": { + "External id": 296294,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456901.803, "dur": 0.315, + "args": { + "External id": 296295,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456903.683, "dur": 0.200, + "args": { + "External id": 296296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456906.836, "dur": 0.210, + "args": { + "External id": 296297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456908.053, "dur": 0.383, + "args": { + "External id": 296298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456909.437, "dur": 0.208, + "args": { + "External id": 296299,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456911.289, "dur": 1.439, + "args": { + "External id": 296300,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456913.653, "dur": 0.183, + "args": { + "External id": 296301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456914.742, "dur": 0.237, + "args": { + "External id": 296302,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368456915.912, "dur": 0.450, + "args": { + "External id": 296303,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368456927.790, "dur": 18.440, + "args": { + "External id": 296304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368456999.088, "dur": 111.383, + "args": { + "External id": 296305,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368457020.295, "dur": 86.702, + "args": { + "External id": 296306,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2929, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368457030.008, "dur": 72.655, + "args": { + "External id": 296307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368457126.493, "dur": 1.696, + "args": { + "External id": 296308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2931, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368457224.901, "dur": 1571.216, + "args": { + "External id": 296309,"Sequence number": 1209186, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2932 + } + }, + { + "ph": "f", "id": 46, "pid": 2070554, "tid": 2107619, "ts": 5333368457224.901, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368457334.691, "dur": 110.049, + "args": { + "External id": 296310,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368457479.470, "dur": 38.741, + "args": { + "External id": 296311,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368457534.508, "dur": 51.889, + "args": { + "External id": 296312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368457595.571, "dur": 75.207, + "args": { + "External id": 296313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368457681.079, "dur": 49.733, + "args": { + "External id": 296314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368457737.820, "dur": 29.267, + "args": { + "External id": 296315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368457775.271, "dur": 41.810, + "args": { + "External id": 296316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368457840.873, "dur": 22.853, + "args": { + "External id": 296317,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368457880.300, "dur": 27.217, + "args": { + "External id": 296318,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368457924.918, "dur": 16.849, + "args": { + "External id": 296319,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368457952.420, "dur": 14.985, + "args": { + "External id": 296320,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368457974.139, "dur": 28.663, + "args": { + "External id": 296321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368458005.830, "dur": 32.202, + "args": { + "External id": 296322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368458064.662, "dur": 177.331, + "args": { + "External id": 296323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368458136.052, "dur": 5.397, + "args": { + "External id": 296324,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368458143.249, "dur": 2.480, + "args": { + "External id": 296325,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368458271.561, "dur": 25.831, + "args": { + "External id": 296326,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368458308.204, "dur": 14.377, + "args": { + "External id": 296327,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368458330.805, "dur": 39.038, + "args": { + "External id": 296328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368458379.676, "dur": 35.442, + "args": { + "External id": 296329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368458420.974, "dur": 24.684, + "args": { + "External id": 296330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368458449.694, "dur": 29.532, + "args": { + "External id": 296331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368458484.618, "dur": 21.234, + "args": { + "External id": 296332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368458512.276, "dur": 29.198, + "args": { + "External id": 296333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368458556.407, "dur": 22.067, + "args": { + "External id": 296334,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368458593.199, "dur": 24.099, + "args": { + "External id": 296335,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368458666.911, "dur": 19.426, + "args": { + "External id": 296336,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368458700.540, "dur": 29.203, + "args": { + "External id": 296337,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368458749.631, "dur": 19.099, + "args": { + "External id": 296338,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458838.988, "dur": 13.938, + "args": { + "External id": 296339,"Record function id": 0, "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458842.036, "dur": 9.903, + "args": { + "External id": 296340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458845.970, "dur": 5.069, + "args": { + "External id": 296341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458847.235, "dur": 3.694, + "args": { + "External id": 296342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458856.605, "dur": 4.056, + "args": { + "External id": 296343,"Record function id": 0, "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458857.681, "dur": 2.551, + "args": { + "External id": 296344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458858.539, "dur": 1.230, + "args": { + "External id": 296345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458858.867, "dur": 0.838, + "args": { + "External id": 296346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458863.972, "dur": 3.658, + "args": { + "External id": 296347,"Record function id": 0, "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458864.959, "dur": 2.211, + "args": { + "External id": 296348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458865.517, "dur": 1.263, + "args": { + "External id": 296349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458865.981, "dur": 0.709, + "args": { + "External id": 296350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458870.794, "dur": 6.894, + "args": { + "External id": 296351,"Record function id": 0, "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458875.289, "dur": 1.971, + "args": { + "External id": 296352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458876.019, "dur": 0.843, + "args": { + "External id": 296353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458876.310, "dur": 0.488, + "args": { + "External id": 296354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458880.812, "dur": 4.976, + "args": { + "External id": 296355,"Record function id": 0, "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458882.189, "dur": 3.107, + "args": { + "External id": 296356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458882.732, "dur": 2.167, + "args": { + "External id": 296357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458883.177, "dur": 1.653, + "args": { + "External id": 296358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458889.037, "dur": 4.219, + "args": { + "External id": 296359,"Record function id": 0, "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458890.412, "dur": 2.402, + "args": { + "External id": 296360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458891.192, "dur": 1.242, + "args": { + "External id": 296361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458891.596, "dur": 0.771, + "args": { + "External id": 296362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458896.505, "dur": 3.858, + "args": { + "External id": 296363,"Record function id": 0, "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458897.572, "dur": 2.341, + "args": { + "External id": 296364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458898.307, "dur": 1.223, + "args": { + "External id": 296365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458898.820, "dur": 0.645, + "args": { + "External id": 296366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458903.444, "dur": 3.168, + "args": { + "External id": 296367,"Record function id": 0, "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458904.421, "dur": 1.764, + "args": { + "External id": 296368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458904.954, "dur": 0.833, + "args": { + "External id": 296369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458905.254, "dur": 0.459, + "args": { + "External id": 296370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458909.796, "dur": 3.150, + "args": { + "External id": 296371,"Record function id": 0, "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368458910.713, "dur": 1.789, + "args": { + "External id": 296372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458911.198, "dur": 0.900, + "args": { + "External id": 296373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368458911.618, "dur": 0.406, + "args": { + "External id": 296374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368458916.727, "dur": 37981.036, + "args": { + "External id": 296375,"Record function id": 0, "Sequence number": 1209185, "Fwd thread id": 1, "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368458917.766, "dur": 37970.967, + "args": { + "External id": 296376,"Sequence number": 1209185, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2999 + } + }, + { + "ph": "f", "id": 47, "pid": 2070554, "tid": 2107619, "ts": 5333368458917.766, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2070554, "tid": 2107619, + "ts": 5333368458949.336, "dur": 36.397, + "args": { + "External id": 296377,"Record function id": 0, "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2070554, "tid": 2107619, + "ts": 5333368458992.686, "dur": 59.889, + "args": { + "External id": 296378,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2070554, "tid": 2107619, + "ts": 5333368459058.201, "dur": 37819.865, + "args": { + "External id": 296379,"Record function id": 0, "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368459142.055, "dur": 6.542, + "args": { + "External id": 296380,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368459157.788, "dur": 4.792, + "args": { + "External id": 296381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368459194.246, "dur": 36874.642, + "args": { + "External id": 296382,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368459207.680, "dur": 36853.279, + "args": { + "External id": 296383,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368459241.746, "dur": 13.845, + "args": { + "External id": 296384,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368459261.755, "dur": 36761.353, + "args": { + "External id": 296385,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368459264.039, "dur": 36758.492, + "args": { + "External id": 296386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368459267.749, "dur": 5.695, + "args": { + "External id": 296387,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368459275.205, "dur": 36743.853, + "args": { + "External id": 296388,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368496153.505, "dur": 8.214, + "args": { + "External id": 296389,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368496155.866, "dur": 5.588, + "args": { + "External id": 296390,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368496200.398, "dur": 354.229, + "args": { + "External id": 296391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368496224.227, "dur": 322.713, + "args": { + "External id": 296392,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3015, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368496235.374, "dur": 306.388, + "args": { + "External id": 296393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368496570.865, "dur": 2.166, + "args": { + "External id": 296394,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3017, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496677.472, "dur": 7.005, + "args": { + "External id": 296395,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496735.145, "dur": 1.205, + "args": { + "External id": 296396,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496752.364, "dur": 0.910, + "args": { + "External id": 296397,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496764.027, "dur": 0.711, + "args": { + "External id": 296398,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496774.289, "dur": 0.834, + "args": { + "External id": 296399,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496783.619, "dur": 1.157, + "args": { + "External id": 296400,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496794.558, "dur": 0.778, + "args": { + "External id": 296401,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496804.683, "dur": 1.613, + "args": { + "External id": 296402,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368496814.993, "dur": 0.784, + "args": { + "External id": 296403,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368496912.943, "dur": 2597.091, + "args": { + "External id": 296404,"Record function id": 0, "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2070554, "tid": 2107619, + "ts": 5333368496932.201, "dur": 967.405, + "args": { + "External id": 296405,"Record function id": 0, "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070554, "tid": 2107619, + "ts": 5333368496946.339, "dur": 308.659, + "args": { + "External id": 296406,"Record function id": 0, "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497019.368, "dur": 3.717, + "args": { + "External id": 296407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497025.969, "dur": 0.578, + "args": { + "External id": 296408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497028.499, "dur": 0.614, + "args": { + "External id": 296409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497030.661, "dur": 1.628, + "args": { + "External id": 296410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497034.075, "dur": 0.463, + "args": { + "External id": 296411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497035.663, "dur": 0.606, + "args": { + "External id": 296412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497037.768, "dur": 0.887, + "args": { + "External id": 296413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497039.869, "dur": 0.422, + "args": { + "External id": 296414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497041.662, "dur": 0.472, + "args": { + "External id": 296415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368497043.450, "dur": 0.438, + "args": { + "External id": 296416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368497061.206, "dur": 162.955, + "args": { + "External id": 296417,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368497076.734, "dur": 142.946, + "args": { + "External id": 296418,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368497088.711, "dur": 12.351, + "args": { + "External id": 296419,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368497105.142, "dur": 84.410, + "args": { + "External id": 296420,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368497107.541, "dur": 81.696, + "args": { + "External id": 296421,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497110.928, "dur": 6.489, + "args": { + "External id": 296422,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368497118.946, "dur": 69.206, + "args": { + "External id": 296423,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2070554, "tid": 2107619, + "ts": 5333368497330.142, "dur": 561.987, + "args": { + "External id": 296424,"Record function id": 0, "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070554, "tid": 2107619, + "ts": 5333368497347.393, "dur": 531.814, + "args": { + "External id": 296425,"Record function id": 0, "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368497402.170, "dur": 5.208, + "args": { + "External id": 296426,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368497422.549, "dur": 23.313, + "args": { + "External id": 296427,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497426.914, "dur": 1.428, + "args": { + "External id": 296428,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497429.929, "dur": 0.267, + "args": { + "External id": 296429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497431.296, "dur": 0.213, + "args": { + "External id": 296430,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497432.732, "dur": 0.677, + "args": { + "External id": 296431,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497437.472, "dur": 0.202, + "args": { + "External id": 296432,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497438.679, "dur": 0.177, + "args": { + "External id": 296433,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497439.905, "dur": 1.097, + "args": { + "External id": 296434,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497441.804, "dur": 0.202, + "args": { + "External id": 296435,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497442.952, "dur": 0.236, + "args": { + "External id": 296436,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368497455.123, "dur": 28.136, + "args": { + "External id": 296437,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368497516.224, "dur": 94.839, + "args": { + "External id": 296438,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368497526.068, "dur": 2.862, + "args": { + "External id": 296439,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368497533.618, "dur": 9.327, + "args": { + "External id": 296440,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368497537.492, "dur": 4.994, + "args": { + "External id": 296441,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497540.714, "dur": 0.446, + "args": { + "External id": 296442,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368497549.235, "dur": 26.029, + "args": { + "External id": 296443,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497550.687, "dur": 0.271, + "args": { + "External id": 296444,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497551.949, "dur": 0.218, + "args": { + "External id": 296445,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497553.427, "dur": 0.224, + "args": { + "External id": 296446,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497554.848, "dur": 4.111, + "args": { + "External id": 296447,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497559.773, "dur": 0.402, + "args": { + "External id": 296448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497567.953, "dur": 0.376, + "args": { + "External id": 296449,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497569.219, "dur": 0.361, + "args": { + "External id": 296450,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497570.433, "dur": 0.257, + "args": { + "External id": 296451,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368497571.375, "dur": 0.312, + "args": { + "External id": 296452,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368497584.002, "dur": 20.246, + "args": { + "External id": 296453,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368497700.314, "dur": 115.153, + "args": { + "External id": 296454,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368497722.263, "dur": 89.789, + "args": { + "External id": 296455,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3078, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368497731.495, "dur": 76.373, + "args": { + "External id": 296456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368497827.941, "dur": 1.801, + "args": { + "External id": 296457,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3080, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368497906.032, "dur": 1584.630, + "args": { + "External id": 296458,"Sequence number": 1209184, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3081 + } + }, + { + "ph": "f", "id": 48, "pid": 2070554, "tid": 2107619, "ts": 5333368497906.032, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498012.085, "dur": 107.477, + "args": { + "External id": 296459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368498156.789, "dur": 60.432, + "args": { + "External id": 296460,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498235.902, "dur": 58.859, + "args": { + "External id": 296461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498304.713, "dur": 37.427, + "args": { + "External id": 296462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498348.424, "dur": 46.761, + "args": { + "External id": 296463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498403.181, "dur": 28.163, + "args": { + "External id": 296464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498438.513, "dur": 43.491, + "args": { + "External id": 296465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368498504.256, "dur": 21.985, + "args": { + "External id": 296466,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368498545.253, "dur": 26.628, + "args": { + "External id": 296467,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368498590.438, "dur": 18.055, + "args": { + "External id": 296468,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368498652.218, "dur": 18.721, + "args": { + "External id": 296469,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498681.640, "dur": 34.494, + "args": { + "External id": 296470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368498719.572, "dur": 34.913, + "args": { + "External id": 296471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368498783.066, "dur": 164.617, + "args": { + "External id": 296472,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368498859.095, "dur": 6.205, + "args": { + "External id": 296473,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368498867.078, "dur": 3.268, + "args": { + "External id": 296474,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368498978.368, "dur": 25.506, + "args": { + "External id": 296475,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368499014.283, "dur": 14.648, + "args": { + "External id": 296476,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368499036.735, "dur": 35.465, + "args": { + "External id": 296477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368499077.518, "dur": 35.215, + "args": { + "External id": 296478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368499118.952, "dur": 21.371, + "args": { + "External id": 296479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368499145.089, "dur": 47.424, + "args": { + "External id": 296480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368499201.987, "dur": 28.090, + "args": { + "External id": 296481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368499236.791, "dur": 29.884, + "args": { + "External id": 296482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368499283.903, "dur": 22.711, + "args": { + "External id": 296483,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368499323.352, "dur": 37.195, + "args": { + "External id": 296484,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368499382.063, "dur": 20.750, + "args": { + "External id": 296485,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368499418.757, "dur": 13.819, + "args": { + "External id": 296486,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368499443.905, "dur": 18.557, + "args": { + "External id": 296487,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499535.416, "dur": 13.998, + "args": { + "External id": 296488,"Record function id": 0, "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499538.430, "dur": 9.955, + "args": { + "External id": 296489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499542.251, "dur": 5.295, + "args": { + "External id": 296490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499543.413, "dur": 4.039, + "args": { + "External id": 296491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499553.036, "dur": 4.632, + "args": { + "External id": 296492,"Record function id": 0, "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499554.437, "dur": 2.781, + "args": { + "External id": 296493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499555.032, "dur": 1.728, + "args": { + "External id": 296494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499555.390, "dur": 1.253, + "args": { + "External id": 296495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499560.940, "dur": 4.216, + "args": { + "External id": 296496,"Record function id": 0, "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499561.998, "dur": 2.680, + "args": { + "External id": 296497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499562.556, "dur": 1.729, + "args": { + "External id": 296498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499562.966, "dur": 1.231, + "args": { + "External id": 296499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499568.477, "dur": 3.937, + "args": { + "External id": 296500,"Record function id": 0, "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499569.487, "dur": 2.471, + "args": { + "External id": 296501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499570.025, "dur": 1.500, + "args": { + "External id": 296502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499570.336, "dur": 1.121, + "args": { + "External id": 296503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499575.506, "dur": 4.094, + "args": { + "External id": 296504,"Record function id": 0, "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499576.675, "dur": 2.463, + "args": { + "External id": 296505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499577.214, "dur": 1.441, + "args": { + "External id": 296506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499577.683, "dur": 0.910, + "args": { + "External id": 296507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499582.692, "dur": 3.873, + "args": { + "External id": 296508,"Record function id": 0, "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499583.758, "dur": 2.374, + "args": { + "External id": 296509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499584.412, "dur": 1.292, + "args": { + "External id": 296510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499584.748, "dur": 0.886, + "args": { + "External id": 296511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499589.703, "dur": 4.511, + "args": { + "External id": 296512,"Record function id": 0, "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499590.975, "dur": 2.759, + "args": { + "External id": 296513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499591.672, "dur": 1.663, + "args": { + "External id": 296514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499592.258, "dur": 1.007, + "args": { + "External id": 296515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499597.304, "dur": 4.062, + "args": { + "External id": 296516,"Record function id": 0, "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499598.583, "dur": 2.339, + "args": { + "External id": 296517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499599.100, "dur": 1.414, + "args": { + "External id": 296518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499599.708, "dur": 0.736, + "args": { + "External id": 296519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499604.421, "dur": 3.786, + "args": { + "External id": 296520,"Record function id": 0, "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368499605.400, "dur": 2.358, + "args": { + "External id": 296521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499605.906, "dur": 1.469, + "args": { + "External id": 296522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368499606.572, "dur": 0.734, + "args": { + "External id": 296523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368499611.970, "dur": 37367.595, + "args": { + "External id": 296524,"Record function id": 0, "Sequence number": 1209183, "Fwd thread id": 1, "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368499613.323, "dur": 37358.205, + "args": { + "External id": 296525,"Sequence number": 1209183, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3148 + } + }, + { + "ph": "f", "id": 49, "pid": 2070554, "tid": 2107619, "ts": 5333368499613.323, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2070554, "tid": 2107619, + "ts": 5333368499674.033, "dur": 37.851, + "args": { + "External id": 296526,"Record function id": 0, "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2070554, "tid": 2107619, + "ts": 5333368499719.733, "dur": 61.122, + "args": { + "External id": 296527,"Record function id": 0, "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2070554, "tid": 2107619, + "ts": 5333368499787.781, "dur": 37176.737, + "args": { + "External id": 296528,"Record function id": 0, "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368499874.247, "dur": 6.653, + "args": { + "External id": 296529,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368499891.334, "dur": 5.165, + "args": { + "External id": 296530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368499909.625, "dur": 36250.287, + "args": { + "External id": 296531,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368499922.133, "dur": 36229.681, + "args": { + "External id": 296532,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368499964.417, "dur": 13.947, + "args": { + "External id": 296533,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368499984.372, "dur": 36126.438, + "args": { + "External id": 296534,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368499986.694, "dur": 36123.432, + "args": { + "External id": 296535,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368499990.141, "dur": 4.924, + "args": { + "External id": 296536,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368499996.900, "dur": 36110.097, + "args": { + "External id": 296537,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368536258.714, "dur": 8.307, + "args": { + "External id": 296538,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368536261.317, "dur": 5.304, + "args": { + "External id": 296539,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368536293.811, "dur": 385.719, + "args": { + "External id": 296540,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368536316.739, "dur": 357.516, + "args": { + "External id": 296541,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3164, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368536327.654, "dur": 340.100, + "args": { + "External id": 296542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368536701.519, "dur": 2.612, + "args": { + "External id": 296543,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3166, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536767.341, "dur": 6.131, + "args": { + "External id": 296544,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536816.624, "dur": 1.370, + "args": { + "External id": 296545,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536832.277, "dur": 1.189, + "args": { + "External id": 296546,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536845.890, "dur": 0.889, + "args": { + "External id": 296547,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536860.002, "dur": 0.883, + "args": { + "External id": 296548,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536870.378, "dur": 1.008, + "args": { + "External id": 296549,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536881.202, "dur": 0.842, + "args": { + "External id": 296550,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536891.160, "dur": 1.239, + "args": { + "External id": 296551,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368536901.039, "dur": 0.932, + "args": { + "External id": 296552,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368536993.452, "dur": 2613.855, + "args": { + "External id": 296553,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2070554, "tid": 2107619, + "ts": 5333368537012.172, "dur": 991.628, + "args": { + "External id": 296554,"Record function id": 0, "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070554, "tid": 2107619, + "ts": 5333368537025.790, "dur": 319.885, + "args": { + "External id": 296555,"Record function id": 0, "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537099.472, "dur": 4.095, + "args": { + "External id": 296556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537106.438, "dur": 0.647, + "args": { + "External id": 296557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537108.918, "dur": 1.173, + "args": { + "External id": 296558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537111.581, "dur": 1.204, + "args": { + "External id": 296559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537114.361, "dur": 0.989, + "args": { + "External id": 296560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537116.804, "dur": 0.723, + "args": { + "External id": 296561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537119.300, "dur": 0.800, + "args": { + "External id": 296562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537121.417, "dur": 0.708, + "args": { + "External id": 296563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537123.568, "dur": 1.090, + "args": { + "External id": 296564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368537126.443, "dur": 0.585, + "args": { + "External id": 296565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368537144.563, "dur": 169.379, + "args": { + "External id": 296566,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368537163.135, "dur": 146.390, + "args": { + "External id": 296567,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368537191.882, "dur": 12.807, + "args": { + "External id": 296568,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368537211.887, "dur": 67.372, + "args": { + "External id": 296569,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368537214.257, "dur": 64.741, + "args": { + "External id": 296570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537217.538, "dur": 7.295, + "args": { + "External id": 296571,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368537226.278, "dur": 52.285, + "args": { + "External id": 296572,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2070554, "tid": 2107619, + "ts": 5333368537424.905, "dur": 571.459, + "args": { + "External id": 296573,"Record function id": 0, "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070554, "tid": 2107619, + "ts": 5333368537442.391, "dur": 541.381, + "args": { + "External id": 296574,"Record function id": 0, "Ev Idx": 3197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368537496.665, "dur": 4.617, + "args": { + "External id": 296575,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368537516.506, "dur": 26.904, + "args": { + "External id": 296576,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537520.899, "dur": 2.375, + "args": { + "External id": 296577,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537524.751, "dur": 0.667, + "args": { + "External id": 296578,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537527.112, "dur": 0.817, + "args": { + "External id": 296579,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537529.412, "dur": 0.542, + "args": { + "External id": 296580,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537531.456, "dur": 0.495, + "args": { + "External id": 296581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537532.790, "dur": 0.615, + "args": { + "External id": 296582,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537534.535, "dur": 0.575, + "args": { + "External id": 296583,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537535.996, "dur": 0.554, + "args": { + "External id": 296584,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537537.923, "dur": 0.540, + "args": { + "External id": 296585,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368537552.691, "dur": 30.325, + "args": { + "External id": 296586,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368537618.262, "dur": 144.326, + "args": { + "External id": 296587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368537665.753, "dur": 4.859, + "args": { + "External id": 296588,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368537679.132, "dur": 11.095, + "args": { + "External id": 296589,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368537683.281, "dur": 6.510, + "args": { + "External id": 296590,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537687.154, "dur": 0.826, + "args": { + "External id": 296591,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368537698.487, "dur": 20.228, + "args": { + "External id": 296592,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537700.074, "dur": 0.472, + "args": { + "External id": 296593,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537702.138, "dur": 0.608, + "args": { + "External id": 296594,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537703.847, "dur": 0.481, + "args": { + "External id": 296595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537705.432, "dur": 0.579, + "args": { + "External id": 296596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537707.393, "dur": 0.403, + "args": { + "External id": 296597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537709.186, "dur": 0.400, + "args": { + "External id": 296598,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537710.928, "dur": 0.642, + "args": { + "External id": 296599,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537712.737, "dur": 0.440, + "args": { + "External id": 296600,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368537714.279, "dur": 0.650, + "args": { + "External id": 296601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368537728.837, "dur": 25.798, + "args": { + "External id": 296602,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368537810.966, "dur": 111.787, + "args": { + "External id": 296603,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368537834.465, "dur": 84.908, + "args": { + "External id": 296604,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3227, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368537843.810, "dur": 71.226, + "args": { + "External id": 296605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368537935.621, "dur": 1.835, + "args": { + "External id": 296606,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3229, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368538010.223, "dur": 1575.812, + "args": { + "External id": 296607,"Sequence number": 1209182, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3230 + } + }, + { + "ph": "f", "id": 50, "pid": 2070554, "tid": 2107619, "ts": 5333368538010.223, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538119.740, "dur": 121.880, + "args": { + "External id": 296608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368538286.061, "dur": 39.716, + "args": { + "External id": 296609,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538341.464, "dur": 55.901, + "args": { + "External id": 296610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538407.150, "dur": 37.154, + "args": { + "External id": 296611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538450.397, "dur": 47.958, + "args": { + "External id": 296612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538505.228, "dur": 27.438, + "args": { + "External id": 296613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538538.697, "dur": 44.929, + "args": { + "External id": 296614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368538605.793, "dur": 59.910, + "args": { + "External id": 296615,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368538686.131, "dur": 28.853, + "args": { + "External id": 296616,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368538735.285, "dur": 19.062, + "args": { + "External id": 296617,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368538769.707, "dur": 16.039, + "args": { + "External id": 296618,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538793.538, "dur": 36.886, + "args": { + "External id": 296619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368538833.572, "dur": 33.064, + "args": { + "External id": 296620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368538892.628, "dur": 163.212, + "args": { + "External id": 296621,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368538965.949, "dur": 6.250, + "args": { + "External id": 296622,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368538973.951, "dur": 2.704, + "args": { + "External id": 296623,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368539083.877, "dur": 24.989, + "args": { + "External id": 296624,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368539119.488, "dur": 13.797, + "args": { + "External id": 296625,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368539140.358, "dur": 50.970, + "args": { + "External id": 296626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368539200.045, "dur": 38.406, + "args": { + "External id": 296627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368539245.580, "dur": 22.576, + "args": { + "External id": 296628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368539272.410, "dur": 30.134, + "args": { + "External id": 296629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368539307.770, "dur": 21.087, + "args": { + "External id": 296630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368539335.620, "dur": 29.104, + "args": { + "External id": 296631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368539382.704, "dur": 21.643, + "args": { + "External id": 296632,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368539421.419, "dur": 40.485, + "args": { + "External id": 296633,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368539482.158, "dur": 16.973, + "args": { + "External id": 296634,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368539514.467, "dur": 15.251, + "args": { + "External id": 296635,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368539544.005, "dur": 14.929, + "args": { + "External id": 296636,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539661.512, "dur": 16.806, + "args": { + "External id": 296637,"Record function id": 0, "Ev Idx": 3260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539665.721, "dur": 11.184, + "args": { + "External id": 296638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539669.763, "dur": 5.691, + "args": { + "External id": 296639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539670.916, "dur": 4.296, + "args": { + "External id": 296640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539682.735, "dur": 4.308, + "args": { + "External id": 296641,"Record function id": 0, "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539683.995, "dur": 2.610, + "args": { + "External id": 296642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539684.649, "dur": 1.500, + "args": { + "External id": 296643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539684.954, "dur": 0.993, + "args": { + "External id": 296644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539690.176, "dur": 4.153, + "args": { + "External id": 296645,"Record function id": 0, "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539691.470, "dur": 2.419, + "args": { + "External id": 296646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539692.094, "dur": 1.402, + "args": { + "External id": 296647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539692.484, "dur": 0.921, + "args": { + "External id": 296648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539697.522, "dur": 4.263, + "args": { + "External id": 296649,"Record function id": 0, "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539699.064, "dur": 2.262, + "args": { + "External id": 296650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539699.611, "dur": 1.326, + "args": { + "External id": 296651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539699.886, "dur": 0.954, + "args": { + "External id": 296652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539704.935, "dur": 4.207, + "args": { + "External id": 296653,"Record function id": 0, "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539706.240, "dur": 2.467, + "args": { + "External id": 296654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539706.704, "dur": 1.569, + "args": { + "External id": 296655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539707.125, "dur": 1.054, + "args": { + "External id": 296656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539712.300, "dur": 3.582, + "args": { + "External id": 296657,"Record function id": 0, "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539713.238, "dur": 2.218, + "args": { + "External id": 296658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539713.807, "dur": 1.260, + "args": { + "External id": 296659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539714.227, "dur": 0.774, + "args": { + "External id": 296660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539719.038, "dur": 4.445, + "args": { + "External id": 296661,"Record function id": 0, "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539720.319, "dur": 2.745, + "args": { + "External id": 296662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539720.753, "dur": 1.899, + "args": { + "External id": 296663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539721.835, "dur": 0.721, + "args": { + "External id": 296664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539726.586, "dur": 3.740, + "args": { + "External id": 296665,"Record function id": 0, "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539727.517, "dur": 2.384, + "args": { + "External id": 296666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539727.968, "dur": 1.531, + "args": { + "External id": 296667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539728.582, "dur": 0.830, + "args": { + "External id": 296668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539733.776, "dur": 3.722, + "args": { + "External id": 296669,"Record function id": 0, "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368539734.897, "dur": 2.158, + "args": { + "External id": 296670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539735.425, "dur": 1.240, + "args": { + "External id": 296671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368539735.772, "dur": 0.804, + "args": { + "External id": 296672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368539741.744, "dur": 36917.966, + "args": { + "External id": 296673,"Record function id": 0, "Sequence number": 1209181, "Fwd thread id": 1, "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368539742.947, "dur": 36875.324, + "args": { + "External id": 296674,"Sequence number": 1209181, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3297 + } + }, + { + "ph": "f", "id": 51, "pid": 2070554, "tid": 2107619, "ts": 5333368539742.947, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2070554, "tid": 2107619, + "ts": 5333368539772.527, "dur": 35.247, + "args": { + "External id": 296675,"Record function id": 0, "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2070554, "tid": 2107619, + "ts": 5333368539814.824, "dur": 63.476, + "args": { + "External id": 296676,"Record function id": 0, "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2070554, "tid": 2107619, + "ts": 5333368539883.757, "dur": 36727.467, + "args": { + "External id": 296677,"Record function id": 0, "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368539967.227, "dur": 6.898, + "args": { + "External id": 296678,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368539983.260, "dur": 4.491, + "args": { + "External id": 296679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368540001.429, "dur": 35790.991, + "args": { + "External id": 296680,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368540014.538, "dur": 35769.795, + "args": { + "External id": 296681,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368540054.303, "dur": 13.047, + "args": { + "External id": 296682,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368540073.537, "dur": 35672.263, + "args": { + "External id": 296683,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368540076.589, "dur": 35668.591, + "args": { + "External id": 296684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368540080.165, "dur": 4.968, + "args": { + "External id": 296685,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368540086.746, "dur": 35654.988, + "args": { + "External id": 296686,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368575881.340, "dur": 8.359, + "args": { + "External id": 296687,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368575883.618, "dur": 5.774, + "args": { + "External id": 296688,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368575916.677, "dur": 414.489, + "args": { + "External id": 296689,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368575943.969, "dur": 382.119, + "args": { + "External id": 296690,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3313, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368575955.115, "dur": 364.826, + "args": { + "External id": 296691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368576351.602, "dur": 5.376, + "args": { + "External id": 296692,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3315, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576418.174, "dur": 6.595, + "args": { + "External id": 296693,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576467.798, "dur": 1.584, + "args": { + "External id": 296694,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576484.864, "dur": 1.683, + "args": { + "External id": 296695,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576497.544, "dur": 1.113, + "args": { + "External id": 296696,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576508.915, "dur": 0.858, + "args": { + "External id": 296697,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576518.933, "dur": 0.760, + "args": { + "External id": 296698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576529.604, "dur": 0.762, + "args": { + "External id": 296699,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576540.286, "dur": 1.161, + "args": { + "External id": 296700,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576551.996, "dur": 0.673, + "args": { + "External id": 296701,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368576675.594, "dur": 2608.549, + "args": { + "External id": 296702,"Record function id": 0, "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2070554, "tid": 2107619, + "ts": 5333368576697.129, "dur": 919.482, + "args": { + "External id": 296703,"Record function id": 0, "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070554, "tid": 2107619, + "ts": 5333368576712.154, "dur": 294.518, + "args": { + "External id": 296704,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576786.804, "dur": 4.690, + "args": { + "External id": 296705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576794.313, "dur": 1.078, + "args": { + "External id": 296706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576797.213, "dur": 1.053, + "args": { + "External id": 296707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576800.277, "dur": 0.766, + "args": { + "External id": 296708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576802.839, "dur": 0.983, + "args": { + "External id": 296709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576805.210, "dur": 0.985, + "args": { + "External id": 296710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576807.902, "dur": 0.677, + "args": { + "External id": 296711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576810.121, "dur": 0.758, + "args": { + "External id": 296712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576812.324, "dur": 0.730, + "args": { + "External id": 296713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368576814.504, "dur": 0.653, + "args": { + "External id": 296714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368576833.403, "dur": 145.622, + "args": { + "External id": 296715,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368576848.560, "dur": 126.416, + "args": { + "External id": 296716,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368576861.811, "dur": 12.276, + "args": { + "External id": 296717,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368576877.606, "dur": 67.404, + "args": { + "External id": 296718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368576879.898, "dur": 64.837, + "args": { + "External id": 296719,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368576883.223, "dur": 6.966, + "args": { + "External id": 296720,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368576891.757, "dur": 52.291, + "args": { + "External id": 296721,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2070554, "tid": 2107619, + "ts": 5333368577078.694, "dur": 531.239, + "args": { + "External id": 296722,"Record function id": 0, "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070554, "tid": 2107619, + "ts": 5333368577093.700, "dur": 502.108, + "args": { + "External id": 296723,"Record function id": 0, "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368577144.567, "dur": 4.972, + "args": { + "External id": 296724,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368577164.279, "dur": 44.888, + "args": { + "External id": 296725,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577184.892, "dur": 2.769, + "args": { + "External id": 296726,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577189.646, "dur": 0.674, + "args": { + "External id": 296727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577191.738, "dur": 0.422, + "args": { + "External id": 296728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577194.373, "dur": 0.731, + "args": { + "External id": 296729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577196.504, "dur": 0.497, + "args": { + "External id": 296730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577198.084, "dur": 0.612, + "args": { + "External id": 296731,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577201.026, "dur": 0.573, + "args": { + "External id": 296732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577202.770, "dur": 0.710, + "args": { + "External id": 296733,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577204.376, "dur": 0.704, + "args": { + "External id": 296734,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368577219.243, "dur": 33.051, + "args": { + "External id": 296735,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368577283.581, "dur": 93.237, + "args": { + "External id": 296736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368577294.431, "dur": 3.935, + "args": { + "External id": 296737,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368577303.269, "dur": 9.402, + "args": { + "External id": 296738,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368577307.186, "dur": 5.032, + "args": { + "External id": 296739,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577310.255, "dur": 0.792, + "args": { + "External id": 296740,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368577319.912, "dur": 17.999, + "args": { + "External id": 296741,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577321.375, "dur": 0.538, + "args": { + "External id": 296742,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577322.993, "dur": 0.559, + "args": { + "External id": 296743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577324.785, "dur": 0.751, + "args": { + "External id": 296744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577326.374, "dur": 0.427, + "args": { + "External id": 296745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577327.865, "dur": 0.377, + "args": { + "External id": 296746,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577329.468, "dur": 0.605, + "args": { + "External id": 296747,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577331.064, "dur": 0.532, + "args": { + "External id": 296748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577332.760, "dur": 0.425, + "args": { + "External id": 296749,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368577334.198, "dur": 0.460, + "args": { + "External id": 296750,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368577346.791, "dur": 22.212, + "args": { + "External id": 296751,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368577418.590, "dur": 114.197, + "args": { + "External id": 296752,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368577444.251, "dur": 85.592, + "args": { + "External id": 296753,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3376, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368577453.307, "dur": 72.611, + "args": { + "External id": 296754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368577545.917, "dur": 1.726, + "args": { + "External id": 296755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3378, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368577659.853, "dur": 1605.263, + "args": { + "External id": 296756,"Sequence number": 1209180, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3379 + } + }, + { + "ph": "f", "id": 52, "pid": 2070554, "tid": 2107619, "ts": 5333368577659.853, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368577771.941, "dur": 109.819, + "args": { + "External id": 296757,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368577922.572, "dur": 36.954, + "args": { + "External id": 296758,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368577977.311, "dur": 52.961, + "args": { + "External id": 296759,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578039.714, "dur": 32.722, + "args": { + "External id": 296760,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578078.380, "dur": 48.825, + "args": { + "External id": 296761,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578133.998, "dur": 30.748, + "args": { + "External id": 296762,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578191.725, "dur": 47.175, + "args": { + "External id": 296763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368578263.218, "dur": 25.070, + "args": { + "External id": 296764,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368578307.067, "dur": 28.018, + "args": { + "External id": 296765,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368578354.050, "dur": 19.135, + "args": { + "External id": 296766,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368578386.272, "dur": 15.358, + "args": { + "External id": 296767,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578409.328, "dur": 29.700, + "args": { + "External id": 296768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578442.066, "dur": 33.376, + "args": { + "External id": 296769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368578503.092, "dur": 201.687, + "args": { + "External id": 296770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368578576.542, "dur": 5.756, + "args": { + "External id": 296771,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368578584.215, "dur": 2.690, + "args": { + "External id": 296772,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368578736.634, "dur": 26.211, + "args": { + "External id": 296773,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368578774.447, "dur": 19.848, + "args": { + "External id": 296774,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578803.020, "dur": 42.582, + "args": { + "External id": 296775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578851.599, "dur": 35.676, + "args": { + "External id": 296776,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578893.945, "dur": 21.858, + "args": { + "External id": 296777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578921.235, "dur": 29.790, + "args": { + "External id": 296778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578955.999, "dur": 21.931, + "args": { + "External id": 296779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368578984.611, "dur": 29.550, + "args": { + "External id": 296780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368579030.933, "dur": 21.272, + "args": { + "External id": 296781,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368579068.663, "dur": 39.638, + "args": { + "External id": 296782,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368579129.235, "dur": 18.132, + "args": { + "External id": 296783,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368579179.678, "dur": 21.825, + "args": { + "External id": 296784,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368579217.139, "dur": 15.921, + "args": { + "External id": 296785,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579306.027, "dur": 17.375, + "args": { + "External id": 296786,"Record function id": 0, "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579309.009, "dur": 13.401, + "args": { + "External id": 296787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579312.905, "dur": 8.642, + "args": { + "External id": 296788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579314.145, "dur": 7.288, + "args": { + "External id": 296789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579327.292, "dur": 3.759, + "args": { + "External id": 296790,"Record function id": 0, "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579328.280, "dur": 2.300, + "args": { + "External id": 296791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579328.882, "dur": 1.278, + "args": { + "External id": 296792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579329.272, "dur": 0.798, + "args": { + "External id": 296793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579334.326, "dur": 3.844, + "args": { + "External id": 296794,"Record function id": 0, "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579335.576, "dur": 2.160, + "args": { + "External id": 296795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579336.087, "dur": 1.253, + "args": { + "External id": 296796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579336.482, "dur": 0.765, + "args": { + "External id": 296797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579341.389, "dur": 3.456, + "args": { + "External id": 296798,"Record function id": 0, "Ev Idx": 3421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579342.432, "dur": 1.995, + "args": { + "External id": 296799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579342.952, "dur": 1.093, + "args": { + "External id": 296800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579343.243, "dur": 0.730, + "args": { + "External id": 296801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579348.019, "dur": 3.470, + "args": { + "External id": 296802,"Record function id": 0, "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579348.934, "dur": 2.124, + "args": { + "External id": 296803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579349.442, "dur": 1.241, + "args": { + "External id": 296804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579349.901, "dur": 0.711, + "args": { + "External id": 296805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579354.658, "dur": 3.365, + "args": { + "External id": 296806,"Record function id": 0, "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579355.593, "dur": 1.976, + "args": { + "External id": 296807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579356.034, "dur": 1.159, + "args": { + "External id": 296808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579356.412, "dur": 0.710, + "args": { + "External id": 296809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579361.201, "dur": 3.743, + "args": { + "External id": 296810,"Record function id": 0, "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579362.191, "dur": 2.329, + "args": { + "External id": 296811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579362.669, "dur": 1.455, + "args": { + "External id": 296812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579363.261, "dur": 0.797, + "args": { + "External id": 296813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579368.091, "dur": 3.702, + "args": { + "External id": 296814,"Record function id": 0, "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579369.055, "dur": 2.301, + "args": { + "External id": 296815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579369.532, "dur": 1.428, + "args": { + "External id": 296816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579370.122, "dur": 0.769, + "args": { + "External id": 296817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579378.410, "dur": 3.524, + "args": { + "External id": 296818,"Record function id": 0, "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368579379.378, "dur": 2.087, + "args": { + "External id": 296819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579379.835, "dur": 1.262, + "args": { + "External id": 296820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368579380.305, "dur": 0.719, + "args": { + "External id": 296821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368579385.790, "dur": 36668.013, + "args": { + "External id": 296822,"Record function id": 0, "Sequence number": 1209179, "Fwd thread id": 1, "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368579386.886, "dur": 36658.844, + "args": { + "External id": 296823,"Sequence number": 1209179, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3446 + } + }, + { + "ph": "f", "id": 53, "pid": 2070554, "tid": 2107619, "ts": 5333368579386.886, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2070554, "tid": 2107619, + "ts": 5333368579414.907, "dur": 37.665, + "args": { + "External id": 296824,"Record function id": 0, "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2070554, "tid": 2107619, + "ts": 5333368579460.235, "dur": 61.874, + "args": { + "External id": 296825,"Record function id": 0, "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2070554, "tid": 2107619, + "ts": 5333368579528.283, "dur": 36509.413, + "args": { + "External id": 296826,"Record function id": 0, "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368579613.166, "dur": 40.623, + "args": { + "External id": 296827,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368579666.135, "dur": 5.509, + "args": { + "External id": 296828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368579685.953, "dur": 35555.117, + "args": { + "External id": 296829,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368579698.632, "dur": 35534.685, + "args": { + "External id": 296830,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368579744.665, "dur": 13.647, + "args": { + "External id": 296831,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368579764.445, "dur": 35428.195, + "args": { + "External id": 296832,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368579771.785, "dur": 35420.284, + "args": { + "External id": 296833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368579774.965, "dur": 5.018, + "args": { + "External id": 296834,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368579781.732, "dur": 35407.166, + "args": { + "External id": 296835,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368615325.497, "dur": 8.117, + "args": { + "External id": 296836,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368615327.775, "dur": 5.576, + "args": { + "External id": 296837,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368615360.327, "dur": 404.394, + "args": { + "External id": 296838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368615384.137, "dur": 375.375, + "args": { + "External id": 296839,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3462, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368615396.032, "dur": 357.437, + "args": { + "External id": 296840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368615787.162, "dur": 2.505, + "args": { + "External id": 296841,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3464, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615848.929, "dur": 6.514, + "args": { + "External id": 296842,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615898.762, "dur": 1.406, + "args": { + "External id": 296843,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615914.006, "dur": 1.403, + "args": { + "External id": 296844,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615926.138, "dur": 0.917, + "args": { + "External id": 296845,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615937.502, "dur": 0.766, + "args": { + "External id": 296846,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615947.447, "dur": 1.050, + "args": { + "External id": 296847,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615958.245, "dur": 0.780, + "args": { + "External id": 296848,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615969.134, "dur": 0.732, + "args": { + "External id": 296849,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368615978.705, "dur": 0.764, + "args": { + "External id": 296850,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368616067.033, "dur": 2687.122, + "args": { + "External id": 296851,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2070554, "tid": 2107619, + "ts": 5333368616087.192, "dur": 990.104, + "args": { + "External id": 296852,"Record function id": 0, "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070554, "tid": 2107619, + "ts": 5333368616102.038, "dur": 316.589, + "args": { + "External id": 296853,"Record function id": 0, "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616194.195, "dur": 4.764, + "args": { + "External id": 296854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616202.174, "dur": 4.415, + "args": { + "External id": 296855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616208.440, "dur": 0.788, + "args": { + "External id": 296856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616210.991, "dur": 0.787, + "args": { + "External id": 296857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616213.765, "dur": 0.836, + "args": { + "External id": 296858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616216.202, "dur": 0.729, + "args": { + "External id": 296859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616218.528, "dur": 0.668, + "args": { + "External id": 296860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616220.718, "dur": 0.827, + "args": { + "External id": 296861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616223.048, "dur": 0.699, + "args": { + "External id": 296862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368616225.032, "dur": 0.863, + "args": { + "External id": 296863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368616245.265, "dur": 145.042, + "args": { + "External id": 296864,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368616261.035, "dur": 125.407, + "args": { + "External id": 296865,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368616273.833, "dur": 12.501, + "args": { + "External id": 296866,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368616289.820, "dur": 66.870, + "args": { + "External id": 296867,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368616292.161, "dur": 64.265, + "args": { + "External id": 296868,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616295.737, "dur": 6.301, + "args": { + "External id": 296869,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368616303.634, "dur": 52.076, + "args": { + "External id": 296870,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2070554, "tid": 2107619, + "ts": 5333368616495.992, "dur": 574.209, + "args": { + "External id": 296871,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070554, "tid": 2107619, + "ts": 5333368616512.674, "dur": 546.308, + "args": { + "External id": 296872,"Record function id": 0, "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368616565.707, "dur": 5.011, + "args": { + "External id": 296873,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368616585.591, "dur": 28.911, + "args": { + "External id": 296874,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616589.982, "dur": 2.991, + "args": { + "External id": 296875,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616594.572, "dur": 0.552, + "args": { + "External id": 296876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616596.700, "dur": 0.549, + "args": { + "External id": 296877,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616599.572, "dur": 0.590, + "args": { + "External id": 296878,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616601.680, "dur": 0.373, + "args": { + "External id": 296879,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616603.074, "dur": 0.501, + "args": { + "External id": 296880,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616605.954, "dur": 0.877, + "args": { + "External id": 296881,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616607.755, "dur": 0.531, + "args": { + "External id": 296882,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616609.180, "dur": 1.603, + "args": { + "External id": 296883,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368616662.236, "dur": 35.801, + "args": { + "External id": 296884,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368616736.925, "dur": 104.305, + "args": { + "External id": 296885,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368616747.867, "dur": 5.802, + "args": { + "External id": 296886,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368616758.466, "dur": 10.716, + "args": { + "External id": 296887,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368616762.501, "dur": 6.254, + "args": { + "External id": 296888,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616766.175, "dur": 0.991, + "args": { + "External id": 296889,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368616776.736, "dur": 26.366, + "args": { + "External id": 296890,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616778.475, "dur": 0.998, + "args": { + "External id": 296891,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616780.708, "dur": 1.630, + "args": { + "External id": 296892,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616783.668, "dur": 0.518, + "args": { + "External id": 296893,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616785.367, "dur": 0.838, + "args": { + "External id": 296894,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616787.964, "dur": 0.699, + "args": { + "External id": 296895,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616793.812, "dur": 0.427, + "args": { + "External id": 296896,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616795.354, "dur": 0.498, + "args": { + "External id": 296897,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616797.491, "dur": 0.578, + "args": { + "External id": 296898,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368616799.177, "dur": 0.493, + "args": { + "External id": 296899,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368616813.134, "dur": 20.063, + "args": { + "External id": 296900,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368616887.731, "dur": 110.233, + "args": { + "External id": 296901,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368616910.119, "dur": 84.601, + "args": { + "External id": 296902,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3525, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368616918.936, "dur": 71.686, + "args": { + "External id": 296903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368617010.734, "dur": 1.828, + "args": { + "External id": 296904,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3527, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368617087.707, "dur": 1647.118, + "args": { + "External id": 296905,"Sequence number": 1209178, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3528 + } + }, + { + "ph": "f", "id": 54, "pid": 2070554, "tid": 2107619, "ts": 5333368617087.707, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617216.393, "dur": 114.532, + "args": { + "External id": 296906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368617370.531, "dur": 42.958, + "args": { + "External id": 296907,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617429.706, "dur": 52.834, + "args": { + "External id": 296908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617491.934, "dur": 36.142, + "args": { + "External id": 296909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617534.030, "dur": 45.190, + "args": { + "External id": 296910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617585.913, "dur": 27.665, + "args": { + "External id": 296911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617660.433, "dur": 49.461, + "args": { + "External id": 296912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368617735.305, "dur": 24.847, + "args": { + "External id": 296913,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368617780.140, "dur": 28.373, + "args": { + "External id": 296914,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368617827.426, "dur": 19.531, + "args": { + "External id": 296915,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368617858.567, "dur": 16.778, + "args": { + "External id": 296916,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617883.415, "dur": 30.752, + "args": { + "External id": 296917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368617917.466, "dur": 33.192, + "args": { + "External id": 296918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368617979.840, "dur": 163.965, + "args": { + "External id": 296919,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368618052.810, "dur": 5.436, + "args": { + "External id": 296920,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368618060.075, "dur": 2.876, + "args": { + "External id": 296921,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368618189.614, "dur": 29.158, + "args": { + "External id": 296922,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368618231.153, "dur": 14.478, + "args": { + "External id": 296923,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368618255.157, "dur": 44.097, + "args": { + "External id": 296924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368618305.828, "dur": 39.786, + "args": { + "External id": 296925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368618352.179, "dur": 22.876, + "args": { + "External id": 296926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368618379.593, "dur": 32.576, + "args": { + "External id": 296927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368618417.822, "dur": 21.696, + "args": { + "External id": 296928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368618445.793, "dur": 29.323, + "args": { + "External id": 296929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368618505.095, "dur": 28.595, + "args": { + "External id": 296930,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368618549.857, "dur": 23.337, + "args": { + "External id": 296931,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368618586.663, "dur": 19.525, + "args": { + "External id": 296932,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368618655.005, "dur": 18.963, + "args": { + "External id": 296933,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368618690.276, "dur": 16.358, + "args": { + "External id": 296934,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618775.820, "dur": 14.112, + "args": { + "External id": 296935,"Record function id": 0, "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618778.817, "dur": 10.179, + "args": { + "External id": 296936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618782.877, "dur": 5.240, + "args": { + "External id": 296937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618784.119, "dur": 3.911, + "args": { + "External id": 296938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618793.866, "dur": 4.378, + "args": { + "External id": 296939,"Record function id": 0, "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618795.195, "dur": 2.540, + "args": { + "External id": 296940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618795.888, "dur": 1.422, + "args": { + "External id": 296941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618796.201, "dur": 1.028, + "args": { + "External id": 296942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618801.431, "dur": 3.866, + "args": { + "External id": 296943,"Record function id": 0, "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618802.430, "dur": 2.430, + "args": { + "External id": 296944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618803.017, "dur": 1.403, + "args": { + "External id": 296945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618803.670, "dur": 0.678, + "args": { + "External id": 296946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618808.440, "dur": 3.668, + "args": { + "External id": 296947,"Record function id": 0, "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618809.457, "dur": 2.169, + "args": { + "External id": 296948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618809.996, "dur": 1.225, + "args": { + "External id": 296949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618810.448, "dur": 0.710, + "args": { + "External id": 296950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618815.153, "dur": 3.807, + "args": { + "External id": 296951,"Record function id": 0, "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618816.114, "dur": 2.367, + "args": { + "External id": 296952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618816.599, "dur": 1.487, + "args": { + "External id": 296953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618817.017, "dur": 0.947, + "args": { + "External id": 296954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618822.074, "dur": 3.694, + "args": { + "External id": 296955,"Record function id": 0, "Ev Idx": 3578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618823.207, "dur": 2.119, + "args": { + "External id": 296956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618823.805, "dur": 1.118, + "args": { + "External id": 296957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618824.119, "dur": 0.732, + "args": { + "External id": 296958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618828.963, "dur": 17.558, + "args": { + "External id": 296959,"Record function id": 0, "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618830.054, "dur": 15.998, + "args": { + "External id": 296960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618844.149, "dur": 1.472, + "args": { + "External id": 296961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618844.516, "dur": 1.033, + "args": { + "External id": 296962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618849.712, "dur": 4.179, + "args": { + "External id": 296963,"Record function id": 0, "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618851.154, "dur": 2.277, + "args": { + "External id": 296964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618851.668, "dur": 1.359, + "args": { + "External id": 296965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618852.031, "dur": 0.919, + "args": { + "External id": 296966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618857.205, "dur": 3.403, + "args": { + "External id": 296967,"Record function id": 0, "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368618858.212, "dur": 1.962, + "args": { + "External id": 296968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618858.694, "dur": 1.068, + "args": { + "External id": 296969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368618858.984, "dur": 0.701, + "args": { + "External id": 296970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368618864.326, "dur": 36889.316, + "args": { + "External id": 296971,"Record function id": 0, "Sequence number": 1209177, "Fwd thread id": 1, "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368618865.432, "dur": 36879.424, + "args": { + "External id": 296972,"Sequence number": 1209177, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3595 + } + }, + { + "ph": "f", "id": 55, "pid": 2070554, "tid": 2107619, "ts": 5333368618865.432, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2070554, "tid": 2107619, + "ts": 5333368618895.754, "dur": 42.281, + "args": { + "External id": 296973,"Record function id": 0, "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2070554, "tid": 2107619, + "ts": 5333368618945.787, "dur": 69.891, + "args": { + "External id": 296974,"Record function id": 0, "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2070554, "tid": 2107619, + "ts": 5333368619021.437, "dur": 36715.319, + "args": { + "External id": 296975,"Record function id": 0, "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368619114.003, "dur": 7.255, + "args": { + "External id": 296976,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368619130.650, "dur": 4.882, + "args": { + "External id": 296977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368619148.870, "dur": 35765.274, + "args": { + "External id": 296978,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368619161.258, "dur": 35744.554, + "args": { + "External id": 296979,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368619228.547, "dur": 14.415, + "args": { + "External id": 296980,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368619249.153, "dur": 35618.772, + "args": { + "External id": 296981,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368619251.747, "dur": 35615.611, + "args": { + "External id": 296982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368619255.312, "dur": 6.188, + "args": { + "External id": 296983,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368619263.149, "dur": 35601.006, + "args": { + "External id": 296984,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368654996.947, "dur": 8.832, + "args": { + "External id": 296985,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368654999.324, "dur": 6.102, + "args": { + "External id": 296986,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368655034.889, "dur": 375.490, + "args": { + "External id": 296987,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368655058.706, "dur": 346.894, + "args": { + "External id": 296988,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3611, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368655070.685, "dur": 329.157, + "args": { + "External id": 296989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368655428.718, "dur": 2.432, + "args": { + "External id": 296990,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3613, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655488.852, "dur": 6.534, + "args": { + "External id": 296991,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655537.062, "dur": 1.145, + "args": { + "External id": 296992,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655552.288, "dur": 1.307, + "args": { + "External id": 296993,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655565.291, "dur": 1.014, + "args": { + "External id": 296994,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655579.730, "dur": 0.927, + "args": { + "External id": 296995,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655591.316, "dur": 0.984, + "args": { + "External id": 296996,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655601.714, "dur": 0.805, + "args": { + "External id": 296997,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655612.334, "dur": 0.739, + "args": { + "External id": 296998,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655671.738, "dur": 1.809, + "args": { + "External id": 296999,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368655768.662, "dur": 2630.131, + "args": { + "External id": 297000,"Record function id": 0, "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2070554, "tid": 2107619, + "ts": 5333368655788.435, "dur": 982.048, + "args": { + "External id": 297001,"Record function id": 0, "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070554, "tid": 2107619, + "ts": 5333368655801.484, "dur": 297.949, + "args": { + "External id": 297002,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655875.730, "dur": 4.191, + "args": { + "External id": 297003,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655882.873, "dur": 0.769, + "args": { + "External id": 297004,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655885.155, "dur": 0.967, + "args": { + "External id": 297005,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655888.295, "dur": 1.060, + "args": { + "External id": 297006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655890.924, "dur": 1.144, + "args": { + "External id": 297007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655893.334, "dur": 0.842, + "args": { + "External id": 297008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655896.041, "dur": 0.625, + "args": { + "External id": 297009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655897.956, "dur": 0.877, + "args": { + "External id": 297010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655900.023, "dur": 0.884, + "args": { + "External id": 297011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368655902.316, "dur": 1.100, + "args": { + "External id": 297012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368655921.593, "dur": 148.328, + "args": { + "External id": 297013,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368655939.230, "dur": 126.609, + "args": { + "External id": 297014,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368655952.314, "dur": 12.089, + "args": { + "External id": 297015,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368655967.655, "dur": 69.118, + "args": { + "External id": 297016,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368655970.285, "dur": 66.150, + "args": { + "External id": 297017,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368655973.970, "dur": 6.332, + "args": { + "External id": 297018,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368655981.770, "dur": 54.010, + "args": { + "External id": 297019,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2070554, "tid": 2107619, + "ts": 5333368656194.260, "dur": 568.414, + "args": { + "External id": 297020,"Record function id": 0, "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070554, "tid": 2107619, + "ts": 5333368656211.495, "dur": 538.760, + "args": { + "External id": 297021,"Record function id": 0, "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368656270.896, "dur": 6.551, + "args": { + "External id": 297022,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368656292.955, "dur": 27.091, + "args": { + "External id": 297023,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656297.244, "dur": 1.971, + "args": { + "External id": 297024,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656300.915, "dur": 0.595, + "args": { + "External id": 297025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656303.028, "dur": 1.016, + "args": { + "External id": 297026,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656305.665, "dur": 0.676, + "args": { + "External id": 297027,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656307.370, "dur": 0.639, + "args": { + "External id": 297028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656309.520, "dur": 0.731, + "args": { + "External id": 297029,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656312.108, "dur": 0.823, + "args": { + "External id": 297030,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656314.113, "dur": 0.695, + "args": { + "External id": 297031,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656315.975, "dur": 0.745, + "args": { + "External id": 297032,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368656330.050, "dur": 31.377, + "args": { + "External id": 297033,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368656391.455, "dur": 93.562, + "args": { + "External id": 297034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368656401.292, "dur": 3.068, + "args": { + "External id": 297035,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368656409.234, "dur": 10.078, + "args": { + "External id": 297036,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368656413.422, "dur": 5.448, + "args": { + "External id": 297037,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656416.670, "dur": 0.958, + "args": { + "External id": 297038,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368656426.110, "dur": 21.373, + "args": { + "External id": 297039,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656428.266, "dur": 0.914, + "args": { + "External id": 297040,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656430.412, "dur": 0.625, + "args": { + "External id": 297041,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656432.274, "dur": 0.808, + "args": { + "External id": 297042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656434.532, "dur": 0.757, + "args": { + "External id": 297043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656436.472, "dur": 0.476, + "args": { + "External id": 297044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656438.307, "dur": 0.716, + "args": { + "External id": 297045,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656440.178, "dur": 0.856, + "args": { + "External id": 297046,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656442.178, "dur": 0.780, + "args": { + "External id": 297047,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368656444.163, "dur": 1.080, + "args": { + "External id": 297048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368656456.687, "dur": 20.834, + "args": { + "External id": 297049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368656526.224, "dur": 152.077, + "args": { + "External id": 297050,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368656545.505, "dur": 128.977, + "args": { + "External id": 297051,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368656555.097, "dur": 114.426, + "args": { + "External id": 297052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368656692.407, "dur": 2.235, + "args": { + "External id": 297053,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368656777.289, "dur": 1603.359, + "args": { + "External id": 297054,"Sequence number": 1209176, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3677 + } + }, + { + "ph": "f", "id": 56, "pid": 2070554, "tid": 2107619, "ts": 5333368656777.289, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368656883.709, "dur": 109.079, + "args": { + "External id": 297055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368657030.289, "dur": 40.232, + "args": { + "External id": 297056,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657085.602, "dur": 51.860, + "args": { + "External id": 297057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657146.285, "dur": 54.823, + "args": { + "External id": 297058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657210.265, "dur": 47.500, + "args": { + "External id": 297059,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657264.078, "dur": 27.791, + "args": { + "External id": 297060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657298.241, "dur": 42.188, + "args": { + "External id": 297061,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368657362.004, "dur": 23.615, + "args": { + "External id": 297062,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368657402.282, "dur": 27.622, + "args": { + "External id": 297063,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368657449.466, "dur": 18.918, + "args": { + "External id": 297064,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368657479.176, "dur": 15.385, + "args": { + "External id": 297065,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657501.818, "dur": 30.434, + "args": { + "External id": 297066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657535.456, "dur": 35.377, + "args": { + "External id": 297067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368657595.842, "dur": 233.573, + "args": { + "External id": 297068,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368657731.005, "dur": 6.499, + "args": { + "External id": 297069,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368657739.298, "dur": 3.195, + "args": { + "External id": 297070,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368657859.226, "dur": 29.715, + "args": { + "External id": 297071,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368657899.656, "dur": 13.677, + "args": { + "External id": 297072,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657921.090, "dur": 45.553, + "args": { + "External id": 297073,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368657972.250, "dur": 36.611, + "args": { + "External id": 297074,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368658015.098, "dur": 21.882, + "args": { + "External id": 297075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368658041.485, "dur": 29.104, + "args": { + "External id": 297076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368658078.709, "dur": 21.897, + "args": { + "External id": 297077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368658107.227, "dur": 41.423, + "args": { + "External id": 297078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368658187.222, "dur": 26.749, + "args": { + "External id": 297079,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368658231.180, "dur": 23.566, + "args": { + "External id": 297080,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368658271.931, "dur": 16.825, + "args": { + "External id": 297081,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368658304.276, "dur": 18.560, + "args": { + "External id": 297082,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368658333.821, "dur": 15.898, + "args": { + "External id": 297083,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658420.147, "dur": 18.033, + "args": { + "External id": 297084,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658423.329, "dur": 13.774, + "args": { + "External id": 297085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658427.214, "dur": 8.945, + "args": { + "External id": 297086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658428.486, "dur": 7.553, + "args": { + "External id": 297087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658442.053, "dur": 4.138, + "args": { + "External id": 297088,"Record function id": 0, "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658443.273, "dur": 2.455, + "args": { + "External id": 297089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658443.879, "dur": 1.408, + "args": { + "External id": 297090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658444.193, "dur": 1.000, + "args": { + "External id": 297091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658449.440, "dur": 3.929, + "args": { + "External id": 297092,"Record function id": 0, "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658450.493, "dur": 2.449, + "args": { + "External id": 297093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658450.995, "dur": 1.480, + "args": { + "External id": 297094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658451.306, "dur": 1.104, + "args": { + "External id": 297095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658456.564, "dur": 3.767, + "args": { + "External id": 297096,"Record function id": 0, "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658457.672, "dur": 2.264, + "args": { + "External id": 297097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658458.190, "dur": 1.353, + "args": { + "External id": 297098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658458.672, "dur": 0.796, + "args": { + "External id": 297099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658463.464, "dur": 4.048, + "args": { + "External id": 297100,"Record function id": 0, "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658464.460, "dur": 2.631, + "args": { + "External id": 297101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658465.139, "dur": 1.566, + "args": { + "External id": 297102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658465.446, "dur": 1.172, + "args": { + "External id": 297103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658470.718, "dur": 3.750, + "args": { + "External id": 297104,"Record function id": 0, "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658471.978, "dur": 2.077, + "args": { + "External id": 297105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658472.455, "dur": 1.210, + "args": { + "External id": 297106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658472.820, "dur": 0.765, + "args": { + "External id": 297107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658477.690, "dur": 3.984, + "args": { + "External id": 297108,"Record function id": 0, "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658479.086, "dur": 2.169, + "args": { + "External id": 297109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658479.586, "dur": 1.284, + "args": { + "External id": 297110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658479.892, "dur": 0.906, + "args": { + "External id": 297111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658484.987, "dur": 3.519, + "args": { + "External id": 297112,"Record function id": 0, "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658486.043, "dur": 2.045, + "args": { + "External id": 297113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658486.536, "dur": 1.155, + "args": { + "External id": 297114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658486.803, "dur": 0.825, + "args": { + "External id": 297115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658491.879, "dur": 3.450, + "args": { + "External id": 297116,"Record function id": 0, "Ev Idx": 3739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368658492.881, "dur": 2.029, + "args": { + "External id": 297117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658493.408, "dur": 1.116, + "args": { + "External id": 297118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368658493.762, "dur": 0.699, + "args": { + "External id": 297119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368658499.909, "dur": 39158.309, + "args": { + "External id": 297120,"Record function id": 0, "Sequence number": 1209175, "Fwd thread id": 1, "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368658501.252, "dur": 39115.817, + "args": { + "External id": 297121,"Sequence number": 1209175, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3744 + } + }, + { + "ph": "f", "id": 57, "pid": 2070554, "tid": 2107619, "ts": 5333368658501.252, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2070554, "tid": 2107619, + "ts": 5333368658528.751, "dur": 37.138, + "args": { + "External id": 297122,"Record function id": 0, "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2070554, "tid": 2107619, + "ts": 5333368658572.842, "dur": 98.930, + "args": { + "External id": 297123,"Record function id": 0, "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2070554, "tid": 2107619, + "ts": 5333368658679.948, "dur": 38929.869, + "args": { + "External id": 297124,"Record function id": 0, "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368658768.296, "dur": 7.107, + "args": { + "External id": 297125,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368658785.307, "dur": 5.336, + "args": { + "External id": 297126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368658807.283, "dur": 37988.122, + "args": { + "External id": 297127,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368658821.215, "dur": 37966.063, + "args": { + "External id": 297128,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368658864.371, "dur": 13.851, + "args": { + "External id": 297129,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368658884.129, "dur": 37866.382, + "args": { + "External id": 297130,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368658886.573, "dur": 37863.332, + "args": { + "External id": 297131,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368658890.288, "dur": 5.169, + "args": { + "External id": 297132,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368658897.237, "dur": 37849.391, + "args": { + "External id": 297133,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368696876.945, "dur": 8.928, + "args": { + "External id": 297134,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368696879.544, "dur": 5.966, + "args": { + "External id": 297135,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368696914.085, "dur": 414.611, + "args": { + "External id": 297136,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368696937.813, "dur": 385.763, + "args": { + "External id": 297137,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3760, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368696948.288, "dur": 369.261, + "args": { + "External id": 297138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368697349.356, "dur": 2.283, + "args": { + "External id": 297139,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3762, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697413.662, "dur": 6.635, + "args": { + "External id": 297140,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697464.439, "dur": 1.639, + "args": { + "External id": 297141,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697480.036, "dur": 1.049, + "args": { + "External id": 297142,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697492.088, "dur": 1.059, + "args": { + "External id": 297143,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697503.328, "dur": 0.920, + "args": { + "External id": 297144,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697513.780, "dur": 0.822, + "args": { + "External id": 297145,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697523.575, "dur": 0.791, + "args": { + "External id": 297146,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697537.004, "dur": 0.993, + "args": { + "External id": 297147,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697547.233, "dur": 1.127, + "args": { + "External id": 297148,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368697673.992, "dur": 2634.160, + "args": { + "External id": 297149,"Record function id": 0, "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2070554, "tid": 2107619, + "ts": 5333368697695.850, "dur": 982.230, + "args": { + "External id": 297150,"Record function id": 0, "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070554, "tid": 2107619, + "ts": 5333368697712.056, "dur": 302.058, + "args": { + "External id": 297151,"Record function id": 0, "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697789.771, "dur": 4.502, + "args": { + "External id": 297152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697797.388, "dur": 0.946, + "args": { + "External id": 297153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697799.965, "dur": 0.665, + "args": { + "External id": 297154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697802.363, "dur": 1.010, + "args": { + "External id": 297155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697804.748, "dur": 0.951, + "args": { + "External id": 297156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697807.528, "dur": 0.926, + "args": { + "External id": 297157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697809.832, "dur": 0.843, + "args": { + "External id": 297158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697811.923, "dur": 0.740, + "args": { + "External id": 297159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697814.268, "dur": 0.994, + "args": { + "External id": 297160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368697816.529, "dur": 0.896, + "args": { + "External id": 297161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368697835.314, "dur": 151.010, + "args": { + "External id": 297162,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368697854.828, "dur": 127.387, + "args": { + "External id": 297163,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368697867.366, "dur": 12.363, + "args": { + "External id": 297164,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368697883.344, "dur": 67.695, + "args": { + "External id": 297165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368697885.591, "dur": 65.110, + "args": { + "External id": 297166,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368697889.460, "dur": 5.491, + "args": { + "External id": 297167,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368697896.641, "dur": 53.315, + "args": { + "External id": 297168,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2070554, "tid": 2107619, + "ts": 5333368698091.135, "dur": 578.145, + "args": { + "External id": 297169,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070554, "tid": 2107619, + "ts": 5333368698106.332, "dur": 509.282, + "args": { + "External id": 297170,"Record function id": 0, "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368698158.033, "dur": 5.057, + "args": { + "External id": 297171,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368698195.319, "dur": 26.370, + "args": { + "External id": 297172,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698200.200, "dur": 1.725, + "args": { + "External id": 297173,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698204.250, "dur": 0.743, + "args": { + "External id": 297174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698206.349, "dur": 0.611, + "args": { + "External id": 297175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698208.171, "dur": 0.685, + "args": { + "External id": 297176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698210.346, "dur": 0.938, + "args": { + "External id": 297177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698212.465, "dur": 0.628, + "args": { + "External id": 297178,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698214.517, "dur": 0.592, + "args": { + "External id": 297179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698216.294, "dur": 0.802, + "args": { + "External id": 297180,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698218.311, "dur": 0.626, + "args": { + "External id": 297181,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368698231.344, "dur": 37.342, + "args": { + "External id": 297182,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368698299.487, "dur": 103.499, + "args": { + "External id": 297183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368698310.083, "dur": 3.753, + "args": { + "External id": 297184,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368698321.826, "dur": 9.626, + "args": { + "External id": 297185,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368698325.740, "dur": 5.271, + "args": { + "External id": 297186,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698328.867, "dur": 1.018, + "args": { + "External id": 297187,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368698338.508, "dur": 25.785, + "args": { + "External id": 297188,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698340.508, "dur": 1.208, + "args": { + "External id": 297189,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698342.921, "dur": 0.740, + "args": { + "External id": 297190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698347.321, "dur": 0.832, + "args": { + "External id": 297191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698349.023, "dur": 0.631, + "args": { + "External id": 297192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698350.766, "dur": 0.777, + "args": { + "External id": 297193,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698352.899, "dur": 0.775, + "args": { + "External id": 297194,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698355.365, "dur": 0.482, + "args": { + "External id": 297195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698357.160, "dur": 0.760, + "args": { + "External id": 297196,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368698359.020, "dur": 0.796, + "args": { + "External id": 297197,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368698373.466, "dur": 22.217, + "args": { + "External id": 297198,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368698449.147, "dur": 105.751, + "args": { + "External id": 297199,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368698468.593, "dur": 83.212, + "args": { + "External id": 297200,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3823, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368698478.299, "dur": 69.660, + "args": { + "External id": 297201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368698568.708, "dur": 1.884, + "args": { + "External id": 297202,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3825, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368698686.307, "dur": 1601.712, + "args": { + "External id": 297203,"Sequence number": 1209174, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3826 + } + }, + { + "ph": "f", "id": 58, "pid": 2070554, "tid": 2107619, "ts": 5333368698686.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368698792.220, "dur": 111.666, + "args": { + "External id": 297204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368698941.719, "dur": 36.886, + "args": { + "External id": 297205,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368698994.881, "dur": 49.470, + "args": { + "External id": 297206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699053.526, "dur": 33.473, + "args": { + "External id": 297207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699093.194, "dur": 46.151, + "args": { + "External id": 297208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699146.407, "dur": 46.668, + "args": { + "External id": 297209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699203.620, "dur": 48.394, + "args": { + "External id": 297210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368699275.332, "dur": 22.961, + "args": { + "External id": 297211,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368699316.546, "dur": 27.244, + "args": { + "External id": 297212,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368699362.147, "dur": 18.743, + "args": { + "External id": 297213,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368699393.304, "dur": 15.850, + "args": { + "External id": 297214,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699417.458, "dur": 31.275, + "args": { + "External id": 297215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699451.801, "dur": 36.010, + "args": { + "External id": 297216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368699516.513, "dur": 208.720, + "args": { + "External id": 297217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368699590.202, "dur": 6.149, + "args": { + "External id": 297218,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368699598.208, "dur": 2.949, + "args": { + "External id": 297219,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368699757.298, "dur": 26.513, + "args": { + "External id": 297220,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368699794.926, "dur": 14.312, + "args": { + "External id": 297221,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699818.010, "dur": 42.841, + "args": { + "External id": 297222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699866.868, "dur": 36.288, + "args": { + "External id": 297223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699909.932, "dur": 22.372, + "args": { + "External id": 297224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699936.534, "dur": 38.659, + "args": { + "External id": 297225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368699980.849, "dur": 20.927, + "args": { + "External id": 297226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368700007.249, "dur": 29.289, + "args": { + "External id": 297227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368700056.475, "dur": 35.745, + "args": { + "External id": 297228,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368700119.442, "dur": 24.980, + "args": { + "External id": 297229,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368700157.879, "dur": 33.228, + "args": { + "External id": 297230,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368700213.567, "dur": 15.801, + "args": { + "External id": 297231,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368700241.052, "dur": 15.174, + "args": { + "External id": 297232,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700329.936, "dur": 14.925, + "args": { + "External id": 297233,"Record function id": 0, "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700332.975, "dur": 10.921, + "args": { + "External id": 297234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700337.142, "dur": 5.971, + "args": { + "External id": 297235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700338.749, "dur": 4.275, + "args": { + "External id": 297236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700349.053, "dur": 4.690, + "args": { + "External id": 297237,"Record function id": 0, "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700350.355, "dur": 2.900, + "args": { + "External id": 297238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700351.161, "dur": 1.691, + "args": { + "External id": 297239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700351.716, "dur": 1.026, + "args": { + "External id": 297240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700357.155, "dur": 3.641, + "args": { + "External id": 297241,"Record function id": 0, "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700358.006, "dur": 2.336, + "args": { + "External id": 297242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700358.655, "dur": 1.305, + "args": { + "External id": 297243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700359.145, "dur": 0.726, + "args": { + "External id": 297244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700363.966, "dur": 7.505, + "args": { + "External id": 297245,"Record function id": 0, "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700364.946, "dur": 6.054, + "args": { + "External id": 297246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700365.494, "dur": 5.117, + "args": { + "External id": 297247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700369.627, "dur": 0.877, + "args": { + "External id": 297248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700374.606, "dur": 3.916, + "args": { + "External id": 297249,"Record function id": 0, "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700375.667, "dur": 2.398, + "args": { + "External id": 297250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700376.169, "dur": 1.523, + "args": { + "External id": 297251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700376.809, "dur": 0.786, + "args": { + "External id": 297252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700381.836, "dur": 4.015, + "args": { + "External id": 297253,"Record function id": 0, "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700382.734, "dur": 2.685, + "args": { + "External id": 297254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700383.489, "dur": 1.541, + "args": { + "External id": 297255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700384.033, "dur": 0.898, + "args": { + "External id": 297256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700389.063, "dur": 3.623, + "args": { + "External id": 297257,"Record function id": 0, "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700390.261, "dur": 2.000, + "args": { + "External id": 297258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700390.800, "dur": 1.079, + "args": { + "External id": 297259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700391.091, "dur": 0.686, + "args": { + "External id": 297260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700395.910, "dur": 3.520, + "args": { + "External id": 297261,"Record function id": 0, "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700396.803, "dur": 2.147, + "args": { + "External id": 297262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700397.292, "dur": 1.273, + "args": { + "External id": 297263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700397.581, "dur": 0.889, + "args": { + "External id": 297264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700402.580, "dur": 3.753, + "args": { + "External id": 297265,"Record function id": 0, "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368700403.639, "dur": 2.238, + "args": { + "External id": 297266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700404.136, "dur": 1.352, + "args": { + "External id": 297267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368700404.595, "dur": 0.790, + "args": { + "External id": 297268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368700410.111, "dur": 36642.416, + "args": { + "External id": 297269,"Record function id": 0, "Sequence number": 1209173, "Fwd thread id": 1, "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368700411.221, "dur": 36632.815, + "args": { + "External id": 297270,"Sequence number": 1209173, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3893 + } + }, + { + "ph": "f", "id": 59, "pid": 2070554, "tid": 2107619, "ts": 5333368700411.221, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2070554, "tid": 2107619, + "ts": 5333368700439.792, "dur": 37.217, + "args": { + "External id": 297271,"Record function id": 0, "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2070554, "tid": 2107619, + "ts": 5333368700484.800, "dur": 62.020, + "args": { + "External id": 297272,"Record function id": 0, "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2070554, "tid": 2107619, + "ts": 5333368700552.307, "dur": 36481.291, + "args": { + "External id": 297273,"Record function id": 0, "Ev Idx": 3896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368700674.414, "dur": 7.734, + "args": { + "External id": 297274,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368700692.639, "dur": 5.202, + "args": { + "External id": 297275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368700712.087, "dur": 35522.047, + "args": { + "External id": 297276,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368700725.218, "dur": 35501.014, + "args": { + "External id": 297277,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368700756.707, "dur": 13.842, + "args": { + "External id": 297278,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368700776.305, "dur": 35413.257, + "args": { + "External id": 297279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368700778.951, "dur": 35410.057, + "args": { + "External id": 297280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368700782.445, "dur": 5.211, + "args": { + "External id": 297281,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368700789.259, "dur": 35396.373, + "args": { + "External id": 297282,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368736315.759, "dur": 9.634, + "args": { + "External id": 297283,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368736318.255, "dur": 6.814, + "args": { + "External id": 297284,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368736352.299, "dur": 403.027, + "args": { + "External id": 297285,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368736375.999, "dur": 374.229, + "args": { + "External id": 297286,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3909, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368736386.246, "dur": 354.473, + "args": { + "External id": 297287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368736774.801, "dur": 2.437, + "args": { + "External id": 297288,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3911, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736835.880, "dur": 6.546, + "args": { + "External id": 297289,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736887.924, "dur": 1.367, + "args": { + "External id": 297290,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736902.890, "dur": 1.651, + "args": { + "External id": 297291,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736914.361, "dur": 1.012, + "args": { + "External id": 297292,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736928.644, "dur": 0.955, + "args": { + "External id": 297293,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736938.972, "dur": 1.047, + "args": { + "External id": 297294,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736949.422, "dur": 1.062, + "args": { + "External id": 297295,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736959.977, "dur": 0.982, + "args": { + "External id": 297296,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368736969.898, "dur": 0.936, + "args": { + "External id": 297297,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368737067.120, "dur": 2691.436, + "args": { + "External id": 297298,"Record function id": 0, "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2070554, "tid": 2107619, + "ts": 5333368737085.921, "dur": 990.501, + "args": { + "External id": 297299,"Record function id": 0, "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070554, "tid": 2107619, + "ts": 5333368737099.470, "dur": 316.690, + "args": { + "External id": 297300,"Record function id": 0, "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737191.765, "dur": 5.089, + "args": { + "External id": 297301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737200.280, "dur": 0.857, + "args": { + "External id": 297302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737203.150, "dur": 0.692, + "args": { + "External id": 297303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737205.553, "dur": 1.000, + "args": { + "External id": 297304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737207.889, "dur": 0.687, + "args": { + "External id": 297305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737210.272, "dur": 0.964, + "args": { + "External id": 297306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737212.616, "dur": 0.829, + "args": { + "External id": 297307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737215.301, "dur": 0.788, + "args": { + "External id": 297308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737217.441, "dur": 0.758, + "args": { + "External id": 297309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368737219.565, "dur": 0.931, + "args": { + "External id": 297310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368737239.287, "dur": 144.484, + "args": { + "External id": 297311,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368737254.732, "dur": 124.819, + "args": { + "External id": 297312,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368737266.777, "dur": 12.042, + "args": { + "External id": 297313,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368737282.592, "dur": 67.694, + "args": { + "External id": 297314,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368737285.558, "dur": 64.459, + "args": { + "External id": 297315,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737289.236, "dur": 6.248, + "args": { + "External id": 297316,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368737297.033, "dur": 52.264, + "args": { + "External id": 297317,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2070554, "tid": 2107619, + "ts": 5333368737497.647, "dur": 572.013, + "args": { + "External id": 297318,"Record function id": 0, "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070554, "tid": 2107619, + "ts": 5333368737513.083, "dur": 544.979, + "args": { + "External id": 297319,"Record function id": 0, "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368737570.324, "dur": 4.463, + "args": { + "External id": 297320,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368737588.698, "dur": 28.419, + "args": { + "External id": 297321,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737593.175, "dur": 1.366, + "args": { + "External id": 297322,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737596.671, "dur": 0.535, + "args": { + "External id": 297323,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737602.177, "dur": 0.526, + "args": { + "External id": 297324,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737604.126, "dur": 0.586, + "args": { + "External id": 297325,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737605.866, "dur": 0.867, + "args": { + "External id": 297326,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737607.767, "dur": 0.782, + "args": { + "External id": 297327,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737610.090, "dur": 0.793, + "args": { + "External id": 297328,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737611.800, "dur": 0.797, + "args": { + "External id": 297329,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737613.735, "dur": 0.824, + "args": { + "External id": 297330,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368737664.824, "dur": 34.798, + "args": { + "External id": 297331,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368737735.225, "dur": 104.083, + "args": { + "External id": 297332,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368737745.751, "dur": 4.259, + "args": { + "External id": 297333,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368737754.905, "dur": 10.674, + "args": { + "External id": 297334,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368737759.000, "dur": 6.149, + "args": { + "External id": 297335,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737762.551, "dur": 1.061, + "args": { + "External id": 297336,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368737772.584, "dur": 24.889, + "args": { + "External id": 297337,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737774.580, "dur": 0.684, + "args": { + "External id": 297338,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737776.644, "dur": 0.722, + "args": { + "External id": 297339,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737778.650, "dur": 0.508, + "args": { + "External id": 297340,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737780.674, "dur": 0.411, + "args": { + "External id": 297341,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737782.523, "dur": 0.641, + "args": { + "External id": 297342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737784.798, "dur": 0.764, + "args": { + "External id": 297343,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737786.962, "dur": 0.543, + "args": { + "External id": 297344,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737788.557, "dur": 0.656, + "args": { + "External id": 297345,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368737790.626, "dur": 0.634, + "args": { + "External id": 297346,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368737807.330, "dur": 24.557, + "args": { + "External id": 297347,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368737883.980, "dur": 111.609, + "args": { + "External id": 297348,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368737906.630, "dur": 85.621, + "args": { + "External id": 297349,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3972, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368737916.508, "dur": 71.438, + "args": { + "External id": 297350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368738009.666, "dur": 1.874, + "args": { + "External id": 297351,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3974, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368738083.055, "dur": 1655.536, + "args": { + "External id": 297352,"Sequence number": 1209172, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3975 + } + }, + { + "ph": "f", "id": 60, "pid": 2070554, "tid": 2107619, "ts": 5333368738083.055, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738212.172, "dur": 110.216, + "args": { + "External id": 297353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368738363.207, "dur": 37.957, + "args": { + "External id": 297354,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738416.278, "dur": 50.081, + "args": { + "External id": 297355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738475.703, "dur": 33.814, + "args": { + "External id": 297356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738515.830, "dur": 44.857, + "args": { + "External id": 297357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738567.356, "dur": 30.322, + "args": { + "External id": 297358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738604.710, "dur": 82.656, + "args": { + "External id": 297359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368738716.967, "dur": 25.325, + "args": { + "External id": 297360,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368738759.665, "dur": 32.242, + "args": { + "External id": 297361,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368738810.216, "dur": 19.281, + "args": { + "External id": 297362,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368738841.465, "dur": 15.331, + "args": { + "External id": 297363,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738867.939, "dur": 34.690, + "args": { + "External id": 297364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368738905.691, "dur": 34.085, + "args": { + "External id": 297365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368738967.261, "dur": 171.393, + "args": { + "External id": 297366,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368739040.779, "dur": 9.710, + "args": { + "External id": 297367,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368739052.474, "dur": 3.433, + "args": { + "External id": 297368,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368739184.760, "dur": 32.864, + "args": { + "External id": 297369,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368739230.375, "dur": 15.893, + "args": { + "External id": 297370,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368739258.941, "dur": 40.982, + "args": { + "External id": 297371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368739306.400, "dur": 36.092, + "args": { + "External id": 297372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368739349.306, "dur": 22.043, + "args": { + "External id": 297373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368739376.111, "dur": 30.728, + "args": { + "External id": 297374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368739412.218, "dur": 21.215, + "args": { + "External id": 297375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368739439.573, "dur": 29.113, + "args": { + "External id": 297376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368739485.016, "dur": 22.030, + "args": { + "External id": 297377,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368739537.074, "dur": 31.060, + "args": { + "External id": 297378,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368739586.851, "dur": 16.964, + "args": { + "External id": 297379,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368739618.122, "dur": 55.099, + "args": { + "External id": 297380,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368739689.503, "dur": 16.896, + "args": { + "External id": 297381,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739781.093, "dur": 15.027, + "args": { + "External id": 297382,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739784.524, "dur": 10.752, + "args": { + "External id": 297383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739788.539, "dur": 5.793, + "args": { + "External id": 297384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739790.143, "dur": 4.070, + "args": { + "External id": 297385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739800.010, "dur": 5.127, + "args": { + "External id": 297386,"Record function id": 0, "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739801.637, "dur": 3.071, + "args": { + "External id": 297387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739802.415, "dur": 1.835, + "args": { + "External id": 297388,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739802.962, "dur": 1.221, + "args": { + "External id": 297389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739808.399, "dur": 3.597, + "args": { + "External id": 297390,"Record function id": 0, "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739809.345, "dur": 2.233, + "args": { + "External id": 297391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739809.903, "dur": 1.217, + "args": { + "External id": 297392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739810.327, "dur": 0.714, + "args": { + "External id": 297393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739815.259, "dur": 3.616, + "args": { + "External id": 297394,"Record function id": 0, "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739816.215, "dur": 2.236, + "args": { + "External id": 297395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739816.737, "dur": 1.288, + "args": { + "External id": 297396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739817.251, "dur": 0.691, + "args": { + "External id": 297397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739822.009, "dur": 3.621, + "args": { + "External id": 297398,"Record function id": 0, "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739822.958, "dur": 2.238, + "args": { + "External id": 297399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739823.498, "dur": 1.258, + "args": { + "External id": 297400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739823.877, "dur": 0.806, + "args": { + "External id": 297401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739828.731, "dur": 4.232, + "args": { + "External id": 297402,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739829.998, "dur": 2.546, + "args": { + "External id": 297403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739830.635, "dur": 1.488, + "args": { + "External id": 297404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739831.183, "dur": 0.868, + "args": { + "External id": 297405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739836.260, "dur": 3.995, + "args": { + "External id": 297406,"Record function id": 0, "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739837.255, "dur": 2.529, + "args": { + "External id": 297407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739837.971, "dur": 1.408, + "args": { + "External id": 297408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739838.384, "dur": 0.921, + "args": { + "External id": 297409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739843.541, "dur": 3.522, + "args": { + "External id": 297410,"Record function id": 0, "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739844.504, "dur": 2.132, + "args": { + "External id": 297411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739844.976, "dur": 1.250, + "args": { + "External id": 297412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739845.316, "dur": 0.838, + "args": { + "External id": 297413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739850.136, "dur": 3.468, + "args": { + "External id": 297414,"Record function id": 0, "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368739851.182, "dur": 1.989, + "args": { + "External id": 297415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739851.658, "dur": 1.101, + "args": { + "External id": 297416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368739852.014, "dur": 0.680, + "args": { + "External id": 297417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368739857.438, "dur": 37512.433, + "args": { + "External id": 297418,"Record function id": 0, "Sequence number": 1209171, "Fwd thread id": 1, "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368739858.878, "dur": 37501.518, + "args": { + "External id": 297419,"Sequence number": 1209171, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4042 + } + }, + { + "ph": "f", "id": 61, "pid": 2070554, "tid": 2107619, "ts": 5333368739858.878, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2070554, "tid": 2107619, + "ts": 5333368739888.379, "dur": 37.368, + "args": { + "External id": 297420,"Record function id": 0, "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2070554, "tid": 2107619, + "ts": 5333368739933.383, "dur": 63.123, + "args": { + "External id": 297421,"Record function id": 0, "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2070554, "tid": 2107619, + "ts": 5333368740002.700, "dur": 37349.942, + "args": { + "External id": 297422,"Record function id": 0, "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368740086.807, "dur": 6.606, + "args": { + "External id": 297423,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368740102.476, "dur": 4.688, + "args": { + "External id": 297424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368740120.609, "dur": 36406.941, + "args": { + "External id": 297425,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368740134.530, "dur": 36384.106, + "args": { + "External id": 297426,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368740200.573, "dur": 14.619, + "args": { + "External id": 297427,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368740221.617, "dur": 36259.539, + "args": { + "External id": 297428,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368740224.284, "dur": 36256.329, + "args": { + "External id": 297429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368740228.278, "dur": 5.867, + "args": { + "External id": 297430,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368740235.875, "dur": 36241.323, + "args": { + "External id": 297431,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368776613.645, "dur": 32.819, + "args": { + "External id": 297432,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368776616.333, "dur": 29.479, + "args": { + "External id": 297433,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368776675.335, "dur": 375.846, + "args": { + "External id": 297434,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368776700.184, "dur": 346.066, + "args": { + "External id": 297435,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4058, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368776711.246, "dur": 329.558, + "args": { + "External id": 297436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368777070.940, "dur": 2.319, + "args": { + "External id": 297437,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4060, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777135.432, "dur": 6.675, + "args": { + "External id": 297438,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777205.298, "dur": 1.993, + "args": { + "External id": 297439,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777223.230, "dur": 1.446, + "args": { + "External id": 297440,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777236.619, "dur": 0.859, + "args": { + "External id": 297441,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777248.720, "dur": 1.144, + "args": { + "External id": 297442,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777259.327, "dur": 0.726, + "args": { + "External id": 297443,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777269.542, "dur": 1.000, + "args": { + "External id": 297444,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777280.869, "dur": 1.209, + "args": { + "External id": 297445,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777291.885, "dur": 1.034, + "args": { + "External id": 297446,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368777385.283, "dur": 2656.094, + "args": { + "External id": 297447,"Record function id": 0, "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2070554, "tid": 2107619, + "ts": 5333368777404.738, "dur": 1016.581, + "args": { + "External id": 297448,"Record function id": 0, "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070554, "tid": 2107619, + "ts": 5333368777421.749, "dur": 348.994, + "args": { + "External id": 297449,"Record function id": 0, "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777500.242, "dur": 4.055, + "args": { + "External id": 297450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777507.637, "dur": 0.831, + "args": { + "External id": 297451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777510.108, "dur": 0.971, + "args": { + "External id": 297452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777512.814, "dur": 0.814, + "args": { + "External id": 297453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777515.219, "dur": 1.051, + "args": { + "External id": 297454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777517.908, "dur": 0.928, + "args": { + "External id": 297455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777520.528, "dur": 0.968, + "args": { + "External id": 297456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777522.754, "dur": 0.843, + "args": { + "External id": 297457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777525.543, "dur": 1.288, + "args": { + "External id": 297458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368777528.326, "dur": 1.124, + "args": { + "External id": 297459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368777548.215, "dur": 185.774, + "args": { + "External id": 297460,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368777563.419, "dur": 165.417, + "args": { + "External id": 297461,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368777576.786, "dur": 11.957, + "args": { + "External id": 297462,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368777592.233, "dur": 105.782, + "args": { + "External id": 297463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368777594.842, "dur": 102.790, + "args": { + "External id": 297464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777598.472, "dur": 6.288, + "args": { + "External id": 297465,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368777606.199, "dur": 90.327, + "args": { + "External id": 297466,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2070554, "tid": 2107619, + "ts": 5333368777856.256, "dur": 558.042, + "args": { + "External id": 297467,"Record function id": 0, "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070554, "tid": 2107619, + "ts": 5333368777872.540, "dur": 529.925, + "args": { + "External id": 297468,"Record function id": 0, "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368777932.590, "dur": 5.887, + "args": { + "External id": 297469,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368777952.971, "dur": 37.324, + "args": { + "External id": 297470,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777960.449, "dur": 1.893, + "args": { + "External id": 297471,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777963.788, "dur": 0.659, + "args": { + "External id": 297472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777973.987, "dur": 0.499, + "args": { + "External id": 297473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777975.267, "dur": 0.860, + "args": { + "External id": 297474,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777977.515, "dur": 0.391, + "args": { + "External id": 297475,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777978.968, "dur": 0.434, + "args": { + "External id": 297476,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777980.602, "dur": 0.369, + "args": { + "External id": 297477,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777982.062, "dur": 0.648, + "args": { + "External id": 297478,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368777983.906, "dur": 0.652, + "args": { + "External id": 297479,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368778000.447, "dur": 31.676, + "args": { + "External id": 297480,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368778066.041, "dur": 94.961, + "args": { + "External id": 297481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368778075.892, "dur": 3.515, + "args": { + "External id": 297482,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368778083.879, "dur": 9.011, + "args": { + "External id": 297483,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368778087.627, "dur": 4.794, + "args": { + "External id": 297484,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778090.639, "dur": 0.487, + "args": { + "External id": 297485,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368778099.254, "dur": 26.342, + "args": { + "External id": 297486,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778101.083, "dur": 0.402, + "args": { + "External id": 297487,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778102.872, "dur": 0.678, + "args": { + "External id": 297488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778104.652, "dur": 0.628, + "args": { + "External id": 297489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778109.291, "dur": 0.533, + "args": { + "External id": 297490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778110.990, "dur": 0.589, + "args": { + "External id": 297491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778112.993, "dur": 0.588, + "args": { + "External id": 297492,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778114.437, "dur": 0.579, + "args": { + "External id": 297493,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778116.021, "dur": 0.576, + "args": { + "External id": 297494,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368778117.971, "dur": 0.713, + "args": { + "External id": 297495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368778134.523, "dur": 19.639, + "args": { + "External id": 297496,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368778222.657, "dur": 113.848, + "args": { + "External id": 297497,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368778244.737, "dur": 88.261, + "args": { + "External id": 297498,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4121, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368778254.482, "dur": 74.246, + "args": { + "External id": 297499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368778349.889, "dur": 1.717, + "args": { + "External id": 297500,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4123, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368778431.918, "dur": 1590.462, + "args": { + "External id": 297501,"Sequence number": 1209170, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4124 + } + }, + { + "ph": "f", "id": 62, "pid": 2070554, "tid": 2107619, "ts": 5333368778431.918, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368778537.155, "dur": 144.610, + "args": { + "External id": 297502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368778724.769, "dur": 38.412, + "args": { + "External id": 297503,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368778781.364, "dur": 55.807, + "args": { + "External id": 297504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368778847.035, "dur": 35.117, + "args": { + "External id": 297505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368778887.960, "dur": 45.034, + "args": { + "External id": 297506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368778939.231, "dur": 27.533, + "args": { + "External id": 297507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368778973.058, "dur": 41.606, + "args": { + "External id": 297508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368779034.626, "dur": 25.919, + "args": { + "External id": 297509,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368779076.821, "dur": 31.384, + "args": { + "External id": 297510,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368779125.845, "dur": 19.049, + "args": { + "External id": 297511,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368779156.598, "dur": 31.992, + "args": { + "External id": 297512,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779199.023, "dur": 35.141, + "args": { + "External id": 297513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779237.184, "dur": 33.370, + "args": { + "External id": 297514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368779295.971, "dur": 165.858, + "args": { + "External id": 297515,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368779372.590, "dur": 5.851, + "args": { + "External id": 297516,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368779380.422, "dur": 2.846, + "args": { + "External id": 297517,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368779489.709, "dur": 25.768, + "args": { + "External id": 297518,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368779525.326, "dur": 15.331, + "args": { + "External id": 297519,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779547.620, "dur": 37.351, + "args": { + "External id": 297520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779590.358, "dur": 72.813, + "args": { + "External id": 297521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779673.571, "dur": 27.440, + "args": { + "External id": 297522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779705.301, "dur": 34.177, + "args": { + "External id": 297523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779745.236, "dur": 21.687, + "args": { + "External id": 297524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368779773.136, "dur": 31.425, + "args": { + "External id": 297525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368779821.847, "dur": 37.196, + "args": { + "External id": 297526,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368779881.515, "dur": 26.466, + "args": { + "External id": 297527,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368779920.523, "dur": 17.560, + "args": { + "External id": 297528,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368779951.725, "dur": 14.489, + "args": { + "External id": 297529,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368779976.741, "dur": 15.508, + "args": { + "External id": 297530,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780063.506, "dur": 16.962, + "args": { + "External id": 297531,"Record function id": 0, "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780069.664, "dur": 9.857, + "args": { + "External id": 297532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780073.381, "dur": 5.233, + "args": { + "External id": 297533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780074.595, "dur": 3.921, + "args": { + "External id": 297534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780084.363, "dur": 3.649, + "args": { + "External id": 297535,"Record function id": 0, "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780085.499, "dur": 2.036, + "args": { + "External id": 297536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780086.072, "dur": 1.011, + "args": { + "External id": 297537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780086.433, "dur": 0.579, + "args": { + "External id": 297538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780091.296, "dur": 4.103, + "args": { + "External id": 297539,"Record function id": 0, "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780092.468, "dur": 2.495, + "args": { + "External id": 297540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780093.064, "dur": 1.437, + "args": { + "External id": 297541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780093.481, "dur": 0.935, + "args": { + "External id": 297542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780098.651, "dur": 3.628, + "args": { + "External id": 297543,"Record function id": 0, "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780099.710, "dur": 2.156, + "args": { + "External id": 297544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780100.243, "dur": 1.186, + "args": { + "External id": 297545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780100.547, "dur": 0.800, + "args": { + "External id": 297546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780105.479, "dur": 3.785, + "args": { + "External id": 297547,"Record function id": 0, "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780106.389, "dur": 2.444, + "args": { + "External id": 297548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780106.937, "dur": 1.487, + "args": { + "External id": 297549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780107.379, "dur": 0.934, + "args": { + "External id": 297550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780112.403, "dur": 3.384, + "args": { + "External id": 297551,"Record function id": 0, "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780113.412, "dur": 1.962, + "args": { + "External id": 297552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780113.958, "dur": 0.974, + "args": { + "External id": 297553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780114.233, "dur": 0.595, + "args": { + "External id": 297554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780118.983, "dur": 3.741, + "args": { + "External id": 297555,"Record function id": 0, "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780120.257, "dur": 2.004, + "args": { + "External id": 297556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780120.718, "dur": 1.123, + "args": { + "External id": 297557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780121.066, "dur": 0.702, + "args": { + "External id": 297558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780125.838, "dur": 3.330, + "args": { + "External id": 297559,"Record function id": 0, "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780126.710, "dur": 2.046, + "args": { + "External id": 297560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780127.301, "dur": 1.036, + "args": { + "External id": 297561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780127.634, "dur": 0.638, + "args": { + "External id": 297562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780132.202, "dur": 3.343, + "args": { + "External id": 297563,"Record function id": 0, "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368780133.334, "dur": 1.799, + "args": { + "External id": 297564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780133.805, "dur": 0.920, + "args": { + "External id": 297565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368780134.104, "dur": 0.554, + "args": { + "External id": 297566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368780139.482, "dur": 37678.251, + "args": { + "External id": 297567,"Record function id": 0, "Sequence number": 1209169, "Fwd thread id": 1, "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368780140.653, "dur": 37668.211, + "args": { + "External id": 297568,"Sequence number": 1209169, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4191 + } + }, + { + "ph": "f", "id": 63, "pid": 2070554, "tid": 2107619, "ts": 5333368780140.653, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2070554, "tid": 2107619, + "ts": 5333368780184.191, "dur": 37.438, + "args": { + "External id": 297569,"Record function id": 0, "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2070554, "tid": 2107619, + "ts": 5333368780230.219, "dur": 60.158, + "args": { + "External id": 297570,"Record function id": 0, "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2070554, "tid": 2107619, + "ts": 5333368780296.068, "dur": 37505.321, + "args": { + "External id": 297571,"Record function id": 0, "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368780383.165, "dur": 6.980, + "args": { + "External id": 297572,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368780399.836, "dur": 5.203, + "args": { + "External id": 297573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368780418.705, "dur": 36526.931, + "args": { + "External id": 297574,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368780431.023, "dur": 36506.633, + "args": { + "External id": 297575,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368780471.807, "dur": 13.868, + "args": { + "External id": 297576,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368780491.848, "dur": 36405.641, + "args": { + "External id": 297577,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368780494.209, "dur": 36402.596, + "args": { + "External id": 297578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368780497.959, "dur": 5.226, + "args": { + "External id": 297579,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368780504.727, "dur": 36388.830, + "args": { + "External id": 297580,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368817026.838, "dur": 8.241, + "args": { + "External id": 297581,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368817029.216, "dur": 5.557, + "args": { + "External id": 297582,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368817062.175, "dur": 410.474, + "args": { + "External id": 297583,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368817084.560, "dur": 382.715, + "args": { + "External id": 297584,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4207, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368817095.111, "dur": 366.138, + "args": { + "External id": 297585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368817492.412, "dur": 2.401, + "args": { + "External id": 297586,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4209, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817562.288, "dur": 6.227, + "args": { + "External id": 297587,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817612.501, "dur": 1.111, + "args": { + "External id": 297588,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817664.799, "dur": 1.880, + "args": { + "External id": 297589,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817680.779, "dur": 0.875, + "args": { + "External id": 297590,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817691.299, "dur": 0.820, + "args": { + "External id": 297591,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817700.718, "dur": 0.776, + "args": { + "External id": 297592,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817711.065, "dur": 0.766, + "args": { + "External id": 297593,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817721.618, "dur": 1.207, + "args": { + "External id": 297594,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368817736.970, "dur": 0.773, + "args": { + "External id": 297595,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368817832.357, "dur": 2514.512, + "args": { + "External id": 297596,"Record function id": 0, "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2070554, "tid": 2107619, + "ts": 5333368817853.585, "dur": 929.678, + "args": { + "External id": 297597,"Record function id": 0, "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070554, "tid": 2107619, + "ts": 5333368817867.624, "dur": 287.871, + "args": { + "External id": 297598,"Record function id": 0, "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817942.737, "dur": 4.590, + "args": { + "External id": 297599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817950.336, "dur": 0.742, + "args": { + "External id": 297600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817952.609, "dur": 0.704, + "args": { + "External id": 297601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817954.725, "dur": 0.651, + "args": { + "External id": 297602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817956.800, "dur": 0.696, + "args": { + "External id": 297603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817959.176, "dur": 0.713, + "args": { + "External id": 297604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817961.216, "dur": 0.905, + "args": { + "External id": 297605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817963.596, "dur": 0.529, + "args": { + "External id": 297606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817965.403, "dur": 1.266, + "args": { + "External id": 297607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368817968.111, "dur": 1.004, + "args": { + "External id": 297608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368817987.031, "dur": 141.405, + "args": { + "External id": 297609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368818001.216, "dur": 123.006, + "args": { + "External id": 297610,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368818013.430, "dur": 11.948, + "args": { + "External id": 297611,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368818028.882, "dur": 66.780, + "args": { + "External id": 297612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368818031.011, "dur": 64.354, + "args": { + "External id": 297613,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818034.322, "dur": 5.304, + "args": { + "External id": 297614,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368818041.138, "dur": 53.586, + "args": { + "External id": 297615,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2070554, "tid": 2107619, + "ts": 5333368818247.271, "dur": 528.668, + "args": { + "External id": 297616,"Record function id": 0, "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070554, "tid": 2107619, + "ts": 5333368818262.602, "dur": 501.300, + "args": { + "External id": 297617,"Record function id": 0, "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368818319.205, "dur": 6.093, + "args": { + "External id": 297618,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368818339.474, "dur": 17.691, + "args": { + "External id": 297619,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818343.810, "dur": 1.407, + "args": { + "External id": 297620,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818346.590, "dur": 0.717, + "args": { + "External id": 297621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818348.057, "dur": 0.258, + "args": { + "External id": 297622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818348.996, "dur": 0.288, + "args": { + "External id": 297623,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818350.145, "dur": 0.228, + "args": { + "External id": 297624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818351.180, "dur": 0.288, + "args": { + "External id": 297625,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818352.093, "dur": 0.259, + "args": { + "External id": 297626,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818353.137, "dur": 0.404, + "args": { + "External id": 297627,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818354.133, "dur": 0.409, + "args": { + "External id": 297628,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368818366.337, "dur": 32.385, + "args": { + "External id": 297629,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368818428.633, "dur": 83.173, + "args": { + "External id": 297630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368818437.921, "dur": 3.007, + "args": { + "External id": 297631,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368818445.651, "dur": 8.682, + "args": { + "External id": 297632,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368818449.363, "dur": 4.505, + "args": { + "External id": 297633,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818452.347, "dur": 0.318, + "args": { + "External id": 297634,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368818460.731, "dur": 16.207, + "args": { + "External id": 297635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818461.975, "dur": 0.468, + "args": { + "External id": 297636,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818463.432, "dur": 0.509, + "args": { + "External id": 297637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818464.661, "dur": 0.444, + "args": { + "External id": 297638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818465.776, "dur": 0.341, + "args": { + "External id": 297639,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818466.799, "dur": 0.285, + "args": { + "External id": 297640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818467.628, "dur": 0.273, + "args": { + "External id": 297641,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818468.643, "dur": 0.282, + "args": { + "External id": 297642,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818469.513, "dur": 0.320, + "args": { + "External id": 297643,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368818470.695, "dur": 0.264, + "args": { + "External id": 297644,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368818485.057, "dur": 19.886, + "args": { + "External id": 297645,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368818552.028, "dur": 145.495, + "args": { + "External id": 297646,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368818571.054, "dur": 122.740, + "args": { + "External id": 297647,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4270, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368818580.334, "dur": 109.129, + "args": { + "External id": 297648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368818712.268, "dur": 1.975, + "args": { + "External id": 297649,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4272, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368818790.072, "dur": 1537.806, + "args": { + "External id": 297650,"Sequence number": 1209168, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4273 + } + }, + { + "ph": "f", "id": 64, "pid": 2070554, "tid": 2107619, "ts": 5333368818790.072, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368818892.753, "dur": 103.781, + "args": { + "External id": 297651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368819030.185, "dur": 37.100, + "args": { + "External id": 297652,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819081.768, "dur": 54.362, + "args": { + "External id": 297653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819145.719, "dur": 53.195, + "args": { + "External id": 297654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819208.722, "dur": 50.766, + "args": { + "External id": 297655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819265.805, "dur": 28.983, + "args": { + "External id": 297656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819301.867, "dur": 41.175, + "args": { + "External id": 297657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368819364.303, "dur": 28.065, + "args": { + "External id": 297658,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368819408.816, "dur": 31.821, + "args": { + "External id": 297659,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368819457.940, "dur": 18.984, + "args": { + "External id": 297660,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368819488.479, "dur": 15.658, + "args": { + "External id": 297661,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819511.927, "dur": 30.271, + "args": { + "External id": 297662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819545.245, "dur": 33.771, + "args": { + "External id": 297663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368819604.794, "dur": 205.492, + "args": { + "External id": 297664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368819717.979, "dur": 6.358, + "args": { + "External id": 297665,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368819726.733, "dur": 2.438, + "args": { + "External id": 297666,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368819839.133, "dur": 23.356, + "args": { + "External id": 297667,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368819872.380, "dur": 13.489, + "args": { + "External id": 297668,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819893.595, "dur": 38.702, + "args": { + "External id": 297669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819938.903, "dur": 34.960, + "args": { + "External id": 297670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368819979.562, "dur": 19.819, + "args": { + "External id": 297671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368820003.986, "dur": 29.452, + "args": { + "External id": 297672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368820038.909, "dur": 18.554, + "args": { + "External id": 297673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368820062.348, "dur": 29.437, + "args": { + "External id": 297674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368820106.672, "dur": 20.501, + "args": { + "External id": 297675,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368820142.746, "dur": 22.066, + "args": { + "External id": 297676,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368820196.833, "dur": 20.328, + "args": { + "External id": 297677,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368820230.940, "dur": 27.281, + "args": { + "External id": 297678,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368820277.416, "dur": 20.833, + "args": { + "External id": 297679,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820371.918, "dur": 14.426, + "args": { + "External id": 297680,"Record function id": 0, "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820374.713, "dur": 10.452, + "args": { + "External id": 297681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820378.855, "dur": 5.352, + "args": { + "External id": 297682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820380.209, "dur": 3.908, + "args": { + "External id": 297683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820390.129, "dur": 4.041, + "args": { + "External id": 297684,"Record function id": 0, "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820391.303, "dur": 2.439, + "args": { + "External id": 297685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820391.969, "dur": 1.312, + "args": { + "External id": 297686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820392.510, "dur": 0.702, + "args": { + "External id": 297687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820397.500, "dur": 4.068, + "args": { + "External id": 297688,"Record function id": 0, "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820398.680, "dur": 2.444, + "args": { + "External id": 297689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820399.230, "dur": 1.498, + "args": { + "External id": 297690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820399.628, "dur": 1.007, + "args": { + "External id": 297691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820404.814, "dur": 4.046, + "args": { + "External id": 297692,"Record function id": 0, "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820405.920, "dur": 2.501, + "args": { + "External id": 297693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820406.696, "dur": 1.330, + "args": { + "External id": 297694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820407.030, "dur": 0.911, + "args": { + "External id": 297695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820412.014, "dur": 4.210, + "args": { + "External id": 297696,"Record function id": 0, "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820413.184, "dur": 2.562, + "args": { + "External id": 297697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820413.850, "dur": 1.511, + "args": { + "External id": 297698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820414.278, "dur": 1.011, + "args": { + "External id": 297699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820419.391, "dur": 6.862, + "args": { + "External id": 297700,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820420.387, "dur": 5.434, + "args": { + "External id": 297701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820421.148, "dur": 1.055, + "args": { + "External id": 297702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820421.524, "dur": 0.616, + "args": { + "External id": 297703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820429.453, "dur": 3.233, + "args": { + "External id": 297704,"Record function id": 0, "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820430.418, "dur": 1.806, + "args": { + "External id": 297705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820430.885, "dur": 0.943, + "args": { + "External id": 297706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820431.211, "dur": 0.551, + "args": { + "External id": 297707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820435.753, "dur": 3.289, + "args": { + "External id": 297708,"Record function id": 0, "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820436.692, "dur": 1.926, + "args": { + "External id": 297709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820437.173, "dur": 1.049, + "args": { + "External id": 297710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820437.555, "dur": 0.595, + "args": { + "External id": 297711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820442.066, "dur": 3.346, + "args": { + "External id": 297712,"Record function id": 0, "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368820443.035, "dur": 1.942, + "args": { + "External id": 297713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820443.495, "dur": 1.077, + "args": { + "External id": 297714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368820443.943, "dur": 0.564, + "args": { + "External id": 297715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368820448.886, "dur": 36921.775, + "args": { + "External id": 297716,"Record function id": 0, "Sequence number": 1209167, "Fwd thread id": 1, "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368820450.105, "dur": 36912.008, + "args": { + "External id": 297717,"Sequence number": 1209167, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4340 + } + }, + { + "ph": "f", "id": 65, "pid": 2070554, "tid": 2107619, "ts": 5333368820450.105, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2070554, "tid": 2107619, + "ts": 5333368820478.139, "dur": 36.282, + "args": { + "External id": 297718,"Record function id": 0, "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2070554, "tid": 2107619, + "ts": 5333368820521.903, "dur": 56.846, + "args": { + "External id": 297719,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2070554, "tid": 2107619, + "ts": 5333368820584.595, "dur": 36770.059, + "args": { + "External id": 297720,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368820707.534, "dur": 7.131, + "args": { + "External id": 297721,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368820728.333, "dur": 4.984, + "args": { + "External id": 297722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368820747.432, "dur": 35934.843, + "args": { + "External id": 297723,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368820759.904, "dur": 35914.860, + "args": { + "External id": 297724,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368820796.186, "dur": 13.223, + "args": { + "External id": 297725,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368820815.521, "dur": 35801.471, + "args": { + "External id": 297726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368820818.074, "dur": 35798.451, + "args": { + "External id": 297727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368820821.720, "dur": 4.990, + "args": { + "External id": 297728,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368820828.507, "dur": 35784.813, + "args": { + "External id": 297729,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368856761.639, "dur": 7.686, + "args": { + "External id": 297730,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368856763.875, "dur": 5.107, + "args": { + "External id": 297731,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368856796.140, "dur": 276.098, + "args": { + "External id": 297732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368856819.205, "dur": 248.557, + "args": { + "External id": 297733,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4356, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368856829.829, "dur": 232.743, + "args": { + "External id": 297734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368857086.654, "dur": 2.184, + "args": { + "External id": 297735,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4358, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857140.355, "dur": 5.976, + "args": { + "External id": 297736,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857205.929, "dur": 2.111, + "args": { + "External id": 297737,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857223.743, "dur": 1.416, + "args": { + "External id": 297738,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857236.015, "dur": 1.030, + "args": { + "External id": 297739,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857246.677, "dur": 0.844, + "args": { + "External id": 297740,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857256.968, "dur": 0.932, + "args": { + "External id": 297741,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857266.935, "dur": 1.159, + "args": { + "External id": 297742,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857278.240, "dur": 1.468, + "args": { + "External id": 297743,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857291.946, "dur": 1.129, + "args": { + "External id": 297744,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368857385.725, "dur": 2607.211, + "args": { + "External id": 297745,"Record function id": 0, "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2070554, "tid": 2107619, + "ts": 5333368857407.836, "dur": 989.812, + "args": { + "External id": 297746,"Record function id": 0, "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070554, "tid": 2107619, + "ts": 5333368857421.320, "dur": 347.042, + "args": { + "External id": 297747,"Record function id": 0, "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857500.768, "dur": 3.875, + "args": { + "External id": 297748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857507.593, "dur": 1.064, + "args": { + "External id": 297749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857510.320, "dur": 1.149, + "args": { + "External id": 297750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857513.073, "dur": 1.113, + "args": { + "External id": 297751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857515.552, "dur": 1.414, + "args": { + "External id": 297752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857518.274, "dur": 1.460, + "args": { + "External id": 297753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857521.109, "dur": 1.384, + "args": { + "External id": 297754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857523.808, "dur": 1.479, + "args": { + "External id": 297755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857526.866, "dur": 1.233, + "args": { + "External id": 297756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368857529.324, "dur": 1.708, + "args": { + "External id": 297757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368857548.334, "dur": 189.551, + "args": { + "External id": 297758,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368857562.570, "dur": 170.628, + "args": { + "External id": 297759,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368857573.878, "dur": 12.156, + "args": { + "External id": 297760,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368857592.663, "dur": 109.782, + "args": { + "External id": 297761,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368857594.921, "dur": 107.159, + "args": { + "External id": 297762,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857598.325, "dur": 5.430, + "args": { + "External id": 297763,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368857605.253, "dur": 95.620, + "args": { + "External id": 297764,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2070554, "tid": 2107619, + "ts": 5333368857848.627, "dur": 541.888, + "args": { + "External id": 297765,"Record function id": 0, "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070554, "tid": 2107619, + "ts": 5333368857864.716, "dur": 513.878, + "args": { + "External id": 297766,"Record function id": 0, "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368857928.037, "dur": 5.166, + "args": { + "External id": 297767,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368857948.385, "dur": 24.713, + "args": { + "External id": 297768,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857952.706, "dur": 1.767, + "args": { + "External id": 297769,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857955.742, "dur": 0.318, + "args": { + "External id": 297770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857957.683, "dur": 0.479, + "args": { + "External id": 297771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857959.797, "dur": 0.625, + "args": { + "External id": 297772,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857961.539, "dur": 0.658, + "args": { + "External id": 297773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857963.576, "dur": 0.304, + "args": { + "External id": 297774,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857965.365, "dur": 0.625, + "args": { + "External id": 297775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857967.261, "dur": 0.336, + "args": { + "External id": 297776,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368857969.156, "dur": 0.654, + "args": { + "External id": 297777,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368857983.768, "dur": 30.796, + "args": { + "External id": 297778,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368858045.637, "dur": 91.015, + "args": { + "External id": 297779,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368858055.039, "dur": 2.964, + "args": { + "External id": 297780,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368858062.640, "dur": 9.802, + "args": { + "External id": 297781,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368858066.441, "dur": 5.558, + "args": { + "External id": 297782,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858069.183, "dur": 0.784, + "args": { + "External id": 297783,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368858078.955, "dur": 22.617, + "args": { + "External id": 297784,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858080.975, "dur": 0.714, + "args": { + "External id": 297785,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858082.676, "dur": 0.521, + "args": { + "External id": 297786,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858083.881, "dur": 0.721, + "args": { + "External id": 297787,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858085.764, "dur": 0.291, + "args": { + "External id": 297788,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858087.286, "dur": 0.565, + "args": { + "External id": 297789,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858088.645, "dur": 0.890, + "args": { + "External id": 297790,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858091.016, "dur": 0.379, + "args": { + "External id": 297791,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858092.585, "dur": 0.614, + "args": { + "External id": 297792,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368858094.237, "dur": 0.487, + "args": { + "External id": 297793,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368858110.336, "dur": 19.700, + "args": { + "External id": 297794,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368858198.884, "dur": 116.407, + "args": { + "External id": 297795,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368858221.299, "dur": 90.578, + "args": { + "External id": 297796,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4419, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368858231.663, "dur": 76.130, + "args": { + "External id": 297797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368858327.908, "dur": 1.775, + "args": { + "External id": 297798,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4421, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368858404.914, "dur": 1568.849, + "args": { + "External id": 297799,"Sequence number": 1209166, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4422 + } + }, + { + "ph": "f", "id": 66, "pid": 2070554, "tid": 2107619, "ts": 5333368858404.914, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368858514.428, "dur": 101.731, + "args": { + "External id": 297800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368858694.838, "dur": 38.766, + "args": { + "External id": 297801,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368858751.054, "dur": 55.237, + "args": { + "External id": 297802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368858816.024, "dur": 32.881, + "args": { + "External id": 297803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368858855.332, "dur": 45.384, + "args": { + "External id": 297804,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368858907.215, "dur": 27.341, + "args": { + "External id": 297805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368858941.443, "dur": 41.646, + "args": { + "External id": 297806,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368859004.421, "dur": 23.225, + "args": { + "External id": 297807,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368859042.964, "dur": 26.797, + "args": { + "External id": 297808,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368859087.701, "dur": 17.398, + "args": { + "External id": 297809,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368859116.921, "dur": 13.954, + "args": { + "External id": 297810,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859138.674, "dur": 46.042, + "args": { + "External id": 297811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859189.690, "dur": 38.070, + "args": { + "External id": 297812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368859255.332, "dur": 162.365, + "args": { + "External id": 297813,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368859327.867, "dur": 6.074, + "args": { + "External id": 297814,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368859335.710, "dur": 2.665, + "args": { + "External id": 297815,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368859445.965, "dur": 24.497, + "args": { + "External id": 297816,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368859481.488, "dur": 16.896, + "args": { + "External id": 297817,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859505.820, "dur": 34.263, + "args": { + "External id": 297818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859545.831, "dur": 34.088, + "args": { + "External id": 297819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859585.916, "dur": 21.104, + "args": { + "External id": 297820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859611.221, "dur": 71.382, + "args": { + "External id": 297821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859692.609, "dur": 21.499, + "args": { + "External id": 297822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368859720.834, "dur": 28.832, + "args": { + "External id": 297823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368859766.671, "dur": 22.572, + "args": { + "External id": 297824,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368859805.769, "dur": 22.627, + "args": { + "External id": 297825,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368859851.995, "dur": 26.289, + "args": { + "External id": 297826,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368859898.576, "dur": 14.626, + "args": { + "External id": 297827,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368859925.409, "dur": 16.744, + "args": { + "External id": 297828,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860014.863, "dur": 17.351, + "args": { + "External id": 297829,"Record function id": 0, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860017.766, "dur": 13.556, + "args": { + "External id": 297830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860021.704, "dur": 8.816, + "args": { + "External id": 297831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860022.920, "dur": 7.488, + "args": { + "External id": 297832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860036.027, "dur": 4.578, + "args": { + "External id": 297833,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860037.538, "dur": 2.621, + "args": { + "External id": 297834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860038.093, "dur": 1.609, + "args": { + "External id": 297835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860038.442, "dur": 1.178, + "args": { + "External id": 297836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860043.877, "dur": 4.306, + "args": { + "External id": 297837,"Record function id": 0, "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860045.138, "dur": 2.587, + "args": { + "External id": 297838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860045.764, "dur": 1.504, + "args": { + "External id": 297839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860046.251, "dur": 0.930, + "args": { + "External id": 297840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860051.464, "dur": 3.848, + "args": { + "External id": 297841,"Record function id": 0, "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860052.568, "dur": 2.323, + "args": { + "External id": 297842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860053.106, "dur": 1.371, + "args": { + "External id": 297843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860053.411, "dur": 0.990, + "args": { + "External id": 297844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860058.548, "dur": 4.390, + "args": { + "External id": 297845,"Record function id": 0, "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860059.715, "dur": 2.773, + "args": { + "External id": 297846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860060.222, "dur": 1.853, + "args": { + "External id": 297847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860060.710, "dur": 1.300, + "args": { + "External id": 297848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860066.090, "dur": 3.961, + "args": { + "External id": 297849,"Record function id": 0, "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860067.311, "dur": 2.300, + "args": { + "External id": 297850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860067.785, "dur": 1.395, + "args": { + "External id": 297851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860068.172, "dur": 0.934, + "args": { + "External id": 297852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860073.316, "dur": 3.981, + "args": { + "External id": 297853,"Record function id": 0, "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860074.337, "dur": 2.531, + "args": { + "External id": 297854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860074.844, "dur": 1.593, + "args": { + "External id": 297855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860075.346, "dur": 1.020, + "args": { + "External id": 297856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860080.434, "dur": 6.885, + "args": { + "External id": 297857,"Record function id": 0, "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860081.513, "dur": 5.377, + "args": { + "External id": 297858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860082.070, "dur": 1.157, + "args": { + "External id": 297859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860082.399, "dur": 0.760, + "args": { + "External id": 297860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860090.439, "dur": 3.445, + "args": { + "External id": 297861,"Record function id": 0, "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368860091.495, "dur": 1.968, + "args": { + "External id": 297862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860091.966, "dur": 1.076, + "args": { + "External id": 297863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368860092.259, "dur": 0.711, + "args": { + "External id": 297864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368860097.785, "dur": 37914.591, + "args": { + "External id": 297865,"Record function id": 0, "Sequence number": 1209165, "Fwd thread id": 1, "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368860098.951, "dur": 37904.934, + "args": { + "External id": 297866,"Sequence number": 1209165, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4489 + } + }, + { + "ph": "f", "id": 67, "pid": 2070554, "tid": 2107619, "ts": 5333368860098.951, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2070554, "tid": 2107619, + "ts": 5333368860127.068, "dur": 55.509, + "args": { + "External id": 297867,"Record function id": 0, "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2070554, "tid": 2107619, + "ts": 5333368860193.211, "dur": 71.994, + "args": { + "External id": 297868,"Record function id": 0, "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2070554, "tid": 2107619, + "ts": 5333368860270.984, "dur": 37725.297, + "args": { + "External id": 297869,"Record function id": 0, "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368860355.146, "dur": 7.245, + "args": { + "External id": 297870,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368860372.558, "dur": 4.834, + "args": { + "External id": 297871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368860390.772, "dur": 36786.845, + "args": { + "External id": 297872,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368860403.482, "dur": 36755.525, + "args": { + "External id": 297873,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368860435.831, "dur": 13.365, + "args": { + "External id": 297874,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368860454.963, "dur": 36664.900, + "args": { + "External id": 297875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368860457.568, "dur": 36661.673, + "args": { + "External id": 297876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368860461.223, "dur": 5.395, + "args": { + "External id": 297877,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368860468.155, "dur": 36647.634, + "args": { + "External id": 297878,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368897268.518, "dur": 9.406, + "args": { + "External id": 297879,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368897271.227, "dur": 6.395, + "args": { + "External id": 297880,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368897307.181, "dur": 413.612, + "args": { + "External id": 297881,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368897331.576, "dur": 383.920, + "args": { + "External id": 297882,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4505, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368897342.365, "dur": 367.168, + "args": { + "External id": 297883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368897741.549, "dur": 2.494, + "args": { + "External id": 297884,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4507, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897803.991, "dur": 6.430, + "args": { + "External id": 297885,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897853.843, "dur": 1.705, + "args": { + "External id": 297886,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897870.655, "dur": 1.433, + "args": { + "External id": 297887,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897883.227, "dur": 1.159, + "args": { + "External id": 297888,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897894.629, "dur": 1.033, + "args": { + "External id": 297889,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897904.834, "dur": 1.336, + "args": { + "External id": 297890,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897916.227, "dur": 1.883, + "args": { + "External id": 297891,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897928.147, "dur": 1.115, + "args": { + "External id": 297892,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368897938.045, "dur": 1.101, + "args": { + "External id": 297893,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368898025.862, "dur": 2635.229, + "args": { + "External id": 297894,"Record function id": 0, "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2070554, "tid": 2107619, + "ts": 5333368898044.006, "dur": 976.335, + "args": { + "External id": 297895,"Record function id": 0, "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070554, "tid": 2107619, + "ts": 5333368898056.052, "dur": 310.419, + "args": { + "External id": 297896,"Record function id": 0, "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898129.853, "dur": 4.339, + "args": { + "External id": 297897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898137.261, "dur": 1.185, + "args": { + "External id": 297898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898139.917, "dur": 1.307, + "args": { + "External id": 297899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898142.579, "dur": 2.039, + "args": { + "External id": 297900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898145.999, "dur": 1.572, + "args": { + "External id": 297901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898148.840, "dur": 0.905, + "args": { + "External id": 297902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898151.252, "dur": 1.253, + "args": { + "External id": 297903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898153.824, "dur": 1.003, + "args": { + "External id": 297904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898156.439, "dur": 1.156, + "args": { + "External id": 297905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368898159.146, "dur": 1.452, + "args": { + "External id": 297906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368898195.475, "dur": 144.051, + "args": { + "External id": 297907,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368898211.574, "dur": 123.630, + "args": { + "External id": 297908,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368898224.420, "dur": 12.396, + "args": { + "External id": 297909,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368898240.299, "dur": 65.959, + "args": { + "External id": 297910,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368898242.994, "dur": 62.884, + "args": { + "External id": 297911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898246.701, "dur": 5.785, + "args": { + "External id": 297912,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368898254.025, "dur": 51.359, + "args": { + "External id": 297913,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2070554, "tid": 2107619, + "ts": 5333368898446.468, "dur": 566.857, + "args": { + "External id": 297914,"Record function id": 0, "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070554, "tid": 2107619, + "ts": 5333368898461.858, "dur": 539.043, + "args": { + "External id": 297915,"Record function id": 0, "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368898525.208, "dur": 4.781, + "args": { + "External id": 297916,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368898545.320, "dur": 23.316, + "args": { + "External id": 297917,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898549.530, "dur": 1.556, + "args": { + "External id": 297918,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898552.349, "dur": 0.839, + "args": { + "External id": 297919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898554.027, "dur": 0.507, + "args": { + "External id": 297920,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898555.237, "dur": 0.599, + "args": { + "External id": 297921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898557.395, "dur": 0.345, + "args": { + "External id": 297922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898559.584, "dur": 0.642, + "args": { + "External id": 297923,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898561.123, "dur": 0.595, + "args": { + "External id": 297924,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898563.183, "dur": 0.697, + "args": { + "External id": 297925,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898564.782, "dur": 0.605, + "args": { + "External id": 297926,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368898578.143, "dur": 30.366, + "args": { + "External id": 297927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2107619, + "ts": 5333368898677.711, "dur": 112.014, + "args": { + "External id": 297928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368898691.510, "dur": 4.658, + "args": { + "External id": 297929,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2107619, + "ts": 5333368898701.189, "dur": 11.719, + "args": { + "External id": 297930,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2107619, + "ts": 5333368898705.561, "dur": 6.926, + "args": { + "External id": 297931,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898709.079, "dur": 1.144, + "args": { + "External id": 297932,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2107619, + "ts": 5333368898719.884, "dur": 23.742, + "args": { + "External id": 297933,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898721.369, "dur": 0.947, + "args": { + "External id": 297934,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898723.601, "dur": 0.329, + "args": { + "External id": 297935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898725.061, "dur": 0.518, + "args": { + "External id": 297936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898726.503, "dur": 0.789, + "args": { + "External id": 297937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898728.451, "dur": 0.689, + "args": { + "External id": 297938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898730.447, "dur": 0.653, + "args": { + "External id": 297939,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898732.397, "dur": 0.665, + "args": { + "External id": 297940,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898734.368, "dur": 0.630, + "args": { + "External id": 297941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368898735.880, "dur": 0.466, + "args": { + "External id": 297942,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368898759.514, "dur": 22.649, + "args": { + "External id": 297943,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368898832.880, "dur": 108.313, + "args": { + "External id": 297944,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368898853.775, "dur": 84.132, + "args": { + "External id": 297945,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4568, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368898863.586, "dur": 70.753, + "args": { + "External id": 297946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368898953.368, "dur": 1.690, + "args": { + "External id": 297947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4570, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368899026.749, "dur": 1579.309, + "args": { + "External id": 297948,"Sequence number": 1209164, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4571 + } + }, + { + "ph": "f", "id": 68, "pid": 2070554, "tid": 2107619, "ts": 5333368899026.749, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899133.667, "dur": 121.850, + "args": { + "External id": 297949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368899294.031, "dur": 39.679, + "args": { + "External id": 297950,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899350.399, "dur": 53.717, + "args": { + "External id": 297951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899414.424, "dur": 32.080, + "args": { + "External id": 297952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899453.067, "dur": 45.465, + "args": { + "External id": 297953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899505.031, "dur": 27.496, + "args": { + "External id": 297954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899539.359, "dur": 41.468, + "args": { + "External id": 297955,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368899602.799, "dur": 61.152, + "args": { + "External id": 297956,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368899684.836, "dur": 31.130, + "args": { + "External id": 297957,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368899736.600, "dur": 20.092, + "args": { + "External id": 297958,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368899769.381, "dur": 15.092, + "args": { + "External id": 297959,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899792.190, "dur": 35.529, + "args": { + "External id": 297960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368899830.976, "dur": 35.416, + "args": { + "External id": 297961,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368899892.084, "dur": 162.523, + "args": { + "External id": 297962,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368899964.959, "dur": 5.777, + "args": { + "External id": 297963,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368899972.549, "dur": 3.337, + "args": { + "External id": 297964,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368900085.227, "dur": 27.091, + "args": { + "External id": 297965,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368900122.564, "dur": 15.750, + "args": { + "External id": 297966,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368900148.557, "dur": 53.109, + "args": { + "External id": 297967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368900210.159, "dur": 39.623, + "args": { + "External id": 297968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368900257.255, "dur": 21.863, + "args": { + "External id": 297969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368900284.149, "dur": 33.757, + "args": { + "External id": 297970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368900324.185, "dur": 20.687, + "args": { + "External id": 297971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368900351.332, "dur": 32.080, + "args": { + "External id": 297972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368900399.743, "dur": 21.393, + "args": { + "External id": 297973,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368900437.734, "dur": 21.936, + "args": { + "External id": 297974,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368900483.824, "dur": 26.949, + "args": { + "External id": 297975,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368900529.378, "dur": 15.240, + "args": { + "External id": 297976,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368900557.260, "dur": 16.610, + "args": { + "External id": 297977,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900684.366, "dur": 15.334, + "args": { + "External id": 297978,"Record function id": 0, "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900687.654, "dur": 10.977, + "args": { + "External id": 297979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900691.682, "dur": 5.549, + "args": { + "External id": 297980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900692.845, "dur": 4.248, + "args": { + "External id": 297981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900703.457, "dur": 13.156, + "args": { + "External id": 297982,"Record function id": 0, "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900704.684, "dur": 11.422, + "args": { + "External id": 297983,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900705.340, "dur": 10.234, + "args": { + "External id": 297984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900714.500, "dur": 0.999, + "args": { + "External id": 297985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900720.009, "dur": 4.026, + "args": { + "External id": 297986,"Record function id": 0, "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900721.170, "dur": 2.416, + "args": { + "External id": 297987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900721.682, "dur": 1.465, + "args": { + "External id": 297988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900722.055, "dur": 0.994, + "args": { + "External id": 297989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900727.316, "dur": 3.941, + "args": { + "External id": 297990,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900728.448, "dur": 2.391, + "args": { + "External id": 297991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900729.130, "dur": 1.285, + "args": { + "External id": 297992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900729.572, "dur": 0.766, + "args": { + "External id": 297993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900734.370, "dur": 3.835, + "args": { + "External id": 297994,"Record function id": 0, "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900735.362, "dur": 2.375, + "args": { + "External id": 297995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900735.875, "dur": 1.428, + "args": { + "External id": 297996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900736.294, "dur": 0.894, + "args": { + "External id": 297997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900741.325, "dur": 3.478, + "args": { + "External id": 297998,"Record function id": 0, "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900742.406, "dur": 1.971, + "args": { + "External id": 297999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900742.916, "dur": 1.059, + "args": { + "External id": 298000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900743.216, "dur": 0.687, + "args": { + "External id": 298001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900747.946, "dur": 3.853, + "args": { + "External id": 298002,"Record function id": 0, "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900749.111, "dur": 2.247, + "args": { + "External id": 298003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900749.599, "dur": 1.335, + "args": { + "External id": 298004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900750.041, "dur": 0.826, + "args": { + "External id": 298005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900754.861, "dur": 3.527, + "args": { + "External id": 298006,"Record function id": 0, "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900755.905, "dur": 2.068, + "args": { + "External id": 298007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900756.374, "dur": 1.148, + "args": { + "External id": 298008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900756.698, "dur": 0.758, + "args": { + "External id": 298009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900761.407, "dur": 3.486, + "args": { + "External id": 298010,"Record function id": 0, "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368900762.330, "dur": 2.148, + "args": { + "External id": 298011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900762.872, "dur": 1.202, + "args": { + "External id": 298012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368900763.228, "dur": 0.773, + "args": { + "External id": 298013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368900768.699, "dur": 37314.173, + "args": { + "External id": 298014,"Record function id": 0, "Sequence number": 1209163, "Fwd thread id": 1, "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368900769.908, "dur": 37304.996, + "args": { + "External id": 298015,"Sequence number": 1209163, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4638 + } + }, + { + "ph": "f", "id": 69, "pid": 2070554, "tid": 2107619, "ts": 5333368900769.908, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2070554, "tid": 2107619, + "ts": 5333368900800.069, "dur": 37.283, + "args": { + "External id": 298016,"Record function id": 0, "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2070554, "tid": 2107619, + "ts": 5333368900844.880, "dur": 63.871, + "args": { + "External id": 298017,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2070554, "tid": 2107619, + "ts": 5333368900914.973, "dur": 37152.248, + "args": { + "External id": 298018,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368901002.246, "dur": 6.479, + "args": { + "External id": 298019,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368901017.903, "dur": 4.612, + "args": { + "External id": 298020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368901035.911, "dur": 36255.958, + "args": { + "External id": 298021,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368901048.674, "dur": 36235.204, + "args": { + "External id": 298022,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368901093.069, "dur": 13.228, + "args": { + "External id": 298023,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368901112.174, "dur": 36132.818, + "args": { + "External id": 298024,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368901114.540, "dur": 36129.804, + "args": { + "External id": 298025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368901118.319, "dur": 5.072, + "args": { + "External id": 298026,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368901125.011, "dur": 36116.120, + "args": { + "External id": 298027,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368937375.594, "dur": 9.144, + "args": { + "External id": 298028,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368937378.451, "dur": 5.919, + "args": { + "External id": 298029,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368937415.761, "dur": 373.413, + "args": { + "External id": 298030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368937438.361, "dur": 345.534, + "args": { + "External id": 298031,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4654, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368937448.912, "dur": 328.883, + "args": { + "External id": 298032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368937807.957, "dur": 2.218, + "args": { + "External id": 298033,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4656, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937870.284, "dur": 6.442, + "args": { + "External id": 298034,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937920.588, "dur": 1.347, + "args": { + "External id": 298035,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937936.302, "dur": 1.317, + "args": { + "External id": 298036,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937949.465, "dur": 1.081, + "args": { + "External id": 298037,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937960.934, "dur": 0.881, + "args": { + "External id": 298038,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937971.438, "dur": 1.104, + "args": { + "External id": 298039,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937982.832, "dur": 0.834, + "args": { + "External id": 298040,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368937994.391, "dur": 1.265, + "args": { + "External id": 298041,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368938005.537, "dur": 1.296, + "args": { + "External id": 298042,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368938096.805, "dur": 2024.130, + "args": { + "External id": 298043,"Record function id": 0, "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2070554, "tid": 2107619, + "ts": 5333368938116.894, "dur": 428.998, + "args": { + "External id": 298044,"Record function id": 0, "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070554, "tid": 2107619, + "ts": 5333368938130.632, "dur": 325.967, + "args": { + "External id": 298045,"Record function id": 0, "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938231.295, "dur": 4.712, + "args": { + "External id": 298046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938239.428, "dur": 1.723, + "args": { + "External id": 298047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938242.902, "dur": 1.358, + "args": { + "External id": 298048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938248.799, "dur": 1.331, + "args": { + "External id": 298049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938251.600, "dur": 1.173, + "args": { + "External id": 298050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938254.192, "dur": 1.123, + "args": { + "External id": 298051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938256.869, "dur": 1.453, + "args": { + "External id": 298052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938259.666, "dur": 1.586, + "args": { + "External id": 298053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938262.646, "dur": 1.093, + "args": { + "External id": 298054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368938265.428, "dur": 1.270, + "args": { + "External id": 298055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368938284.141, "dur": 143.060, + "args": { + "External id": 298056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368938299.557, "dur": 123.507, + "args": { + "External id": 298057,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368938311.508, "dur": 12.358, + "args": { + "External id": 298058,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368938327.359, "dur": 66.178, + "args": { + "External id": 298059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368938329.601, "dur": 63.549, + "args": { + "External id": 298060,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368938333.101, "dur": 5.066, + "args": { + "External id": 298061,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368938339.776, "dur": 52.890, + "args": { + "External id": 298062,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368938552.902, "dur": 1548.231, + "args": { + "External id": 298063,"Sequence number": 1209162, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4686 + } + }, + { + "ph": "f", "id": 70, "pid": 2070554, "tid": 2107619, "ts": 5333368938552.902, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368938699.770, "dur": 107.339, + "args": { + "External id": 298064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368938845.621, "dur": 42.491, + "args": { + "External id": 298065,"kernel_hash": "cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/xo/cxonx5mk7e7epy33myj3jjhyzcg75j44argg6g4lybldimdgtudf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070554, "tid": 2107619, + "ts": 5333368938902.943, "dur": 53.593, + "args": { + "External id": 298066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368938965.747, "dur": 30.927, + "args": { + "External id": 298067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939002.417, "dur": 44.327, + "args": { + "External id": 298068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939053.214, "dur": 26.640, + "args": { + "External id": 298069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939085.949, "dur": 40.926, + "args": { + "External id": 298070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368939148.100, "dur": 38.922, + "args": { + "External id": 298071,"kernel_hash": "czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zp/czpg7m5cvhtnsrbedscn3z34ms26dcj4yxfddqcc2lsxt5vfm3gr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368939208.155, "dur": 30.134, + "args": { + "External id": 298072,"kernel_hash": "c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47krgrbnxrqlxc37sur5dq2rv5rcz5jecidebny2djfdualz2i5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368939258.846, "dur": 19.568, + "args": { + "External id": 298073,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368939290.808, "dur": 16.175, + "args": { + "External id": 298074,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939315.064, "dur": 32.708, + "args": { + "External id": 298075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939351.027, "dur": 33.771, + "args": { + "External id": 298076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070554, "tid": 2107619, + "ts": 5333368939411.300, "dur": 163.306, + "args": { + "External id": 298077,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368939484.573, "dur": 6.056, + "args": { + "External id": 298078,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368939492.558, "dur": 2.701, + "args": { + "External id": 298079,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368939605.037, "dur": 62.412, + "args": { + "External id": 298080,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368939680.739, "dur": 17.429, + "args": { + "External id": 298081,"kernel_hash": "caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/aa/caaxfmlp366sipjsiqrbwa7bn4zulwwbbk2av7b7vvjxoyepdkkh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939707.549, "dur": 40.640, + "args": { + "External id": 298082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939754.635, "dur": 36.718, + "args": { + "External id": 298083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939798.076, "dur": 21.705, + "args": { + "External id": 298084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939825.396, "dur": 31.058, + "args": { + "External id": 298085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939862.427, "dur": 20.218, + "args": { + "External id": 298086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2107619, + "ts": 5333368939889.341, "dur": 29.128, + "args": { + "External id": 298087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070554, "tid": 2107619, + "ts": 5333368939933.747, "dur": 19.299, + "args": { + "External id": 298088,"kernel_hash": "cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldidzj4zx5p32cpjzdu4iw32vq5mroxtnzzjnkygl7i3gxyrjx4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368939968.793, "dur": 21.084, + "args": { + "External id": 298089,"kernel_hash": "cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/bd/cbd2aaszixjoxe5zl2hakyfbnpdttdl6psxd5acx4ptx7o6rknwz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368940004.373, "dur": 15.351, + "args": { + "External id": 298090,"kernel_hash": "cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmooewemsuhwg5it5lnepehjxxoc2bqz5dtdtexrhyn7fn4f6l2.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070554, "tid": 2107619, + "ts": 5333368940034.188, "dur": 13.324, + "args": { + "External id": 298091,"kernel_hash": "ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/cj/ccjfnrp6f5yimftxut2uazyiv6kcmaxyqhpulc32bp3yvqhit56o.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070554, "tid": 2107619, + "ts": 5333368940059.087, "dur": 14.799, + "args": { + "External id": 298092,"kernel_hash": "c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5a/c5a5x6ehhfbgwbprspks6opettr56mogdbcmfzd5gpopd6oqjars.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940142.619, "dur": 14.711, + "args": { + "External id": 298093,"Record function id": 0, "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940146.038, "dur": 10.213, + "args": { + "External id": 298094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940149.661, "dur": 5.832, + "args": { + "External id": 298095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940151.027, "dur": 4.334, + "args": { + "External id": 298096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940160.954, "dur": 20.438, + "args": { + "External id": 298097,"Record function id": 0, "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940162.214, "dur": 17.692, + "args": { + "External id": 298098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940162.797, "dur": 1.846, + "args": { + "External id": 298099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940163.149, "dur": 1.386, + "args": { + "External id": 298100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940187.079, "dur": 6.379, + "args": { + "External id": 298101,"Record function id": 0, "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940189.012, "dur": 3.964, + "args": { + "External id": 298102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940189.996, "dur": 2.304, + "args": { + "External id": 298103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940190.826, "dur": 1.379, + "args": { + "External id": 298104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940196.691, "dur": 4.400, + "args": { + "External id": 298105,"Record function id": 0, "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940198.064, "dur": 2.583, + "args": { + "External id": 298106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940198.811, "dur": 1.419, + "args": { + "External id": 298107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940199.335, "dur": 0.815, + "args": { + "External id": 298108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940204.251, "dur": 3.869, + "args": { + "External id": 298109,"Record function id": 0, "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940205.600, "dur": 2.080, + "args": { + "External id": 298110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940206.116, "dur": 1.121, + "args": { + "External id": 298111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940206.411, "dur": 0.760, + "args": { + "External id": 298112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940211.274, "dur": 3.989, + "args": { + "External id": 298113,"Record function id": 0, "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940212.538, "dur": 2.280, + "args": { + "External id": 298114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940213.029, "dur": 1.348, + "args": { + "External id": 298115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940213.639, "dur": 0.665, + "args": { + "External id": 298116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940218.464, "dur": 3.650, + "args": { + "External id": 298117,"Record function id": 0, "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940219.556, "dur": 2.123, + "args": { + "External id": 298118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940220.059, "dur": 1.121, + "args": { + "External id": 298119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940220.372, "dur": 0.735, + "args": { + "External id": 298120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940225.329, "dur": 4.078, + "args": { + "External id": 298121,"Record function id": 0, "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940226.692, "dur": 2.237, + "args": { + "External id": 298122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940227.203, "dur": 1.291, + "args": { + "External id": 298123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940227.676, "dur": 0.755, + "args": { + "External id": 298124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940232.952, "dur": 3.746, + "args": { + "External id": 298125,"Record function id": 0, "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368940234.016, "dur": 2.248, + "args": { + "External id": 298126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940234.487, "dur": 1.357, + "args": { + "External id": 298127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368940234.791, "dur": 0.990, + "args": { + "External id": 298128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368940240.472, "dur": 37202.386, + "args": { + "External id": 298129,"Record function id": 0, "Sequence number": 1209161, "Fwd thread id": 1, "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368940241.609, "dur": 37193.370, + "args": { + "External id": 298130,"Sequence number": 1209161, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4753 + } + }, + { + "ph": "f", "id": 71, "pid": 2070554, "tid": 2107619, "ts": 5333368940241.609, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2070554, "tid": 2107619, + "ts": 5333368940273.738, "dur": 38.565, + "args": { + "External id": 298131,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2070554, "tid": 2107619, + "ts": 5333368940319.195, "dur": 66.049, + "args": { + "External id": 298132,"Record function id": 0, "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2070554, "tid": 2107619, + "ts": 5333368940390.975, "dur": 37036.437, + "args": { + "External id": 298133,"Record function id": 0, "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368940474.599, "dur": 6.624, + "args": { + "External id": 298134,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368940490.602, "dur": 4.673, + "args": { + "External id": 298135,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368940513.259, "dur": 36222.607, + "args": { + "External id": 298136,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368940525.903, "dur": 36201.724, + "args": { + "External id": 298137,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368940582.526, "dur": 14.206, + "args": { + "External id": 298138,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368940602.475, "dur": 36092.028, + "args": { + "External id": 298139,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368940604.699, "dur": 36089.192, + "args": { + "External id": 298140,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368940608.591, "dur": 6.030, + "args": { + "External id": 298141,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368940616.255, "dur": 36075.157, + "args": { + "External id": 298142,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368976816.901, "dur": 8.416, + "args": { + "External id": 298143,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368976819.361, "dur": 5.669, + "args": { + "External id": 298144,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368976852.782, "dur": 285.961, + "args": { + "External id": 298145,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368976877.423, "dur": 257.276, + "args": { + "External id": 298146,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4769, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368976887.948, "dur": 241.766, + "args": { + "External id": 298147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368977154.550, "dur": 2.079, + "args": { + "External id": 298148,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4771, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977227.455, "dur": 6.658, + "args": { + "External id": 298149,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977277.394, "dur": 1.224, + "args": { + "External id": 298150,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977294.477, "dur": 1.370, + "args": { + "External id": 298151,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977307.809, "dur": 1.092, + "args": { + "External id": 298152,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977319.951, "dur": 1.113, + "args": { + "External id": 298153,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977331.351, "dur": 1.540, + "args": { + "External id": 298154,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977343.276, "dur": 0.997, + "args": { + "External id": 298155,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977354.786, "dur": 1.629, + "args": { + "External id": 298156,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368977366.156, "dur": 1.303, + "args": { + "External id": 298157,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368977457.939, "dur": 310.132, + "args": { + "External id": 298158,"Record function id": 0, "Sequence number": 1209160, "Fwd thread id": 1, "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070554, "tid": 2107619, + "ts": 5333368977460.489, "dur": 298.435, + "args": { + "External id": 298159,"Sequence number": 1209160, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4782 + } + }, + { + "ph": "f", "id": 72, "pid": 2070554, "tid": 2107619, "ts": 5333368977460.489, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2070554, "tid": 2107619, + "ts": 5333368977572.583, "dur": 44.756, + "args": { + "External id": 298160,"kernel_hash": "cgh7npvq6fhcxx3reazrdhuyliq3n5n76s36t3lcymwjpvltgnle", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh7npvq6fhcxx3reazrdhuyliq3n5n76s36t3lcymwjpvltgnle.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2070554, "tid": 2107619, + "ts": 5333368977668.587, "dur": 31.934, + "args": { + "External id": 298161,"kernel_hash": "cnysn4mnhgerjcric5y2lu3awivs6z2tsvqalsf43c23fcqe7gd4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnysn4mnhgerjcric5y2lu3awivs6z2tsvqalsf43c23fcqe7gd4.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096], [16, 4096, 2048], [32000, 2048], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2070554, "tid": 2107619, + "ts": 5333368977719.842, "dur": 22.415, + "args": { + "External id": 298162,"kernel_hash": "catnay4z3ivljnzu3qgi3oklbggqihu5hajduwtrkgcudhz6vnwm", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/at/catnay4z3ivljnzu3qgi3oklbggqihu5hajduwtrkgcudhz6vnwm.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368977779.827, "dur": 14.080, + "args": { + "External id": 298163,"Record function id": 0, "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070554, "tid": 2107619, + "ts": 5333368977783.471, "dur": 9.540, + "args": { + "External id": 298164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368977786.993, "dur": 5.202, + "args": { + "External id": 298165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2107619, + "ts": 5333368977788.341, "dur": 3.763, + "args": { + "External id": 298166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2070554, "tid": 2107619, + "ts": 5333368977815.698, "dur": 5871.758, + "args": { + "External id": 298167,"Record function id": 0, "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2070554, "tid": 2107619, + "ts": 5333368977833.057, "dur": 21.513, + "args": { + "External id": 298168,"Record function id": 0, "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2070554, "tid": 2107619, + "ts": 5333368977860.255, "dur": 57.268, + "args": { + "External id": 298169,"Record function id": 0, "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2070554, "tid": 2107619, + "ts": 5333368977922.386, "dur": 5426.477, + "args": { + "External id": 298170,"Record function id": 0, "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368977990.578, "dur": 6.108, + "args": { + "External id": 298171,"Record function id": 0, "Concrete Inputs": ["[196610048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2107619, + "ts": 5333368978005.370, "dur": 4.333, + "args": { + "External id": 298172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368978022.780, "dur": 4623.623, + "args": { + "External id": 298173,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070554, "tid": 2107619, + "ts": 5333368978034.004, "dur": 4576.311, + "args": { + "External id": 298174,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368978204.031, "dur": 15.237, + "args": { + "External id": 298175,"Record function id": 0, "Concrete Inputs": ["[48027]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2107619, + "ts": 5333368978269.680, "dur": 4301.575, + "args": { + "External id": 298176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], [], []], "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2107619, + "ts": 5333368978272.247, "dur": 4298.103, + "args": { + "External id": 298177,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], []], "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368978276.335, "dur": 9.684, + "args": { + "External id": 298178,"Record function id": 0, "Concrete Inputs": ["[48027]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2107619, + "ts": 5333368978287.653, "dur": 4278.773, + "args": { + "External id": 298179,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[48027], [48027], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368982733.919, "dur": 9.535, + "args": { + "External id": 298180,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[196610048], [], [], [], [], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2107619, + "ts": 5333368982736.721, "dur": 6.428, + "args": { + "External id": 298181,"Record function id": 0, "Concrete Inputs": ["[24576256]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070554, "tid": 2107619, + "ts": 5333368982769.539, "dur": 356.255, + "args": { + "External id": 298182,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[24576256], [196610048], [], [], [], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368982792.008, "dur": 329.841, + "args": { + "External id": 298183,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 24576256, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[196610048], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4806, "In msg nelems": 196610048 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070554, "tid": 2107619, + "ts": 5333368982802.981, "dur": 313.955, + "args": { + "External id": 298184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[196610048]], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2107619, + "ts": 5333368983140.756, "dur": 2.092, + "args": { + "External id": 298185,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4808, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368983215.488, "dur": 6.513, + "args": { + "External id": 298186,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368983262.357, "dur": 1.184, + "args": { + "External id": 298187,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368983277.402, "dur": 2.136, + "args": { + "External id": 298188,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "8192256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2107619, + "ts": 5333368983290.212, "dur": 1.387, + "args": { + "External id": 298189,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "16384256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#14847", "pid": 2070554, "tid": 2070554, + "ts": 5333366952495.969, "dur": 2048457.837, + "args": { + "External id": 289281,"Record function id": 0, "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2070554, "tid": 2070554, + "ts": 5333366952529.109, "dur": 528.768, + "args": { + "External id": 289282,"Record function id": 0, "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2070554, "tid": 2070554, + "ts": 5333366953099.087, "dur": 2213.782, + "args": { + "External id": 289283,"Record function id": 0, "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366954096.084, "dur": 8.303, + "args": { + "External id": 289284,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070554, "tid": 2070554, + "ts": 5333366954125.002, "dur": 7.166, + "args": { + "External id": 289285,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366954546.602, "dur": 2.350, + "args": { + "External id": 289286,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070554, "tid": 2070554, + "ts": 5333366954559.431, "dur": 2.481, + "args": { + "External id": 289287,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366955208.820, "dur": 2.384, + "args": { + "External id": 289288,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070554, "tid": 2070554, + "ts": 5333366955216.230, "dur": 2.214, + "args": { + "External id": 289289,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333366956071.967, "dur": 15.568, + "args": { + "External id": 289290,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366956081.431, "dur": 2.258, + "args": { + "External id": 289291,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333366956088.972, "dur": 4.270, + "args": { + "External id": 289292,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366956090.877, "dur": 1.353, + "args": { + "External id": 289293,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366956117.163, "dur": 556.888, + "args": { + "External id": 289294,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366956124.464, "dur": 548.541, + "args": { + "External id": 289295,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366956134.715, "dur": 9.627, + "args": { + "External id": 289296,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366956146.622, "dur": 524.483, + "args": { + "External id": 289297,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366956156.279, "dur": 0.508, + "args": { + "External id": 289298,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070554, "tid": 2070554, + "ts": 5333366956159.122, "dur": 5.369, + "args": { + "External id": 289299,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[16, 4096], [16, 4096]], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070554, "tid": 2070554, + "ts": 5333366956161.156, "dur": 3.169, + "args": { + "External id": 289300,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[16, 4096], [], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366956163.505, "dur": 0.551, + "args": { + "External id": 289301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070554, "tid": 2070554, + "ts": 5333366956181.204, "dur": 217.876, + "args": { + "External id": 289302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070554, "tid": 2070554, + "ts": 5333366956184.322, "dur": 214.426, + "args": { + "External id": 289303,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333366956186.586, "dur": 41.868, + "args": { + "External id": 289304,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366956191.387, "dur": 36.292, + "args": { + "External id": 289305,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366956229.228, "dur": 169.063, + "args": { + "External id": 289306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366956401.270, "dur": 265.400, + "args": { + "External id": 289307,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366956694.871, "dur": 431.843, + "args": { + "External id": 289308,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366956696.779, "dur": 428.979, + "args": { + "External id": 289309,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366956705.144, "dur": 9.387, + "args": { + "External id": 289310,"Record function id": 0, "Concrete Inputs": ["[16, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366956718.240, "dur": 405.226, + "args": { + "External id": 289311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[16, 8192], [16, 8192], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070554, "tid": 2070554, + "ts": 5333366957151.549, "dur": 76.759, + "args": { + "External id": 289312,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366957156.333, "dur": 4.722, + "args": { + "External id": 289313,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070554, "tid": 2070554, + "ts": 5333366957162.979, "dur": 64.733, + "args": { + "External id": 289314,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333366957188.683, "dur": 7.985, + "args": { + "External id": 289315,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2070554, "tid": 2070554, + "ts": 5333366957241.071, "dur": 77.850, + "args": { + "External id": 289316,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070554, "tid": 2070554, + "ts": 5333366957247.998, "dur": 8.130, + "args": { + "External id": 289317,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957253.313, "dur": 2.532, + "args": { + "External id": 289318,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 4850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366957257.271, "dur": 4.010, + "args": { + "External id": 289319,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2070554, "tid": 2070554, + "ts": 5333366957263.730, "dur": 3.718, + "args": { + "External id": 289320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[16, 4096]], "Ev Idx": 4852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070554, "tid": 2070554, + "ts": 5333366957270.326, "dur": 5.819, + "args": { + "External id": 289321,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957275.265, "dur": 0.680, + "args": { + "External id": 289322,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070554, "tid": 2070554, + "ts": 5333366957277.064, "dur": 4.313, + "args": { + "External id": 289323,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957280.368, "dur": 0.911, + "args": { + "External id": 289324,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070554, "tid": 2070554, + "ts": 5333366957282.940, "dur": 6.411, + "args": { + "External id": 289325,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [16, 1, 1, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070554, "tid": 2070554, + "ts": 5333366957286.658, "dur": 2.415, + "args": { + "External id": 289326,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957288.079, "dur": 0.908, + "args": { + "External id": 289327,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366957290.312, "dur": 27.785, + "args": { + "External id": 289328,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[16, 1, 1, 4096], [16, 1, 1, 4096], []], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366957327.419, "dur": 29.448, + "args": { + "External id": 289329,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366957328.570, "dur": 28.036, + "args": { + "External id": 289330,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957333.495, "dur": 3.193, + "args": { + "External id": 289331,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366957337.284, "dur": 18.933, + "args": { + "External id": 289332,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333366957467.415, "dur": 149.961, + "args": { + "External id": 289333,"Record function id": 0, "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2070554, "tid": 2070554, + "ts": 5333366957549.851, "dur": 57.117, + "args": { + "External id": 289334,"Record function id": 0, "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333366957666.861, "dur": 44.783, + "args": { + "External id": 289335,"Record function id": 0, "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333366957720.640, "dur": 7698.629, + "args": { + "External id": 289336,"Record function id": 0, "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2070554, "tid": 2070554, + "ts": 5333366957729.542, "dur": 779.716, + "args": { + "External id": 289337,"Record function id": 0, "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366957796.552, "dur": 9.194, + "args": { + "External id": 289338,"Record function id": 0, "Concrete Inputs": ["[24576256]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366957819.476, "dur": 18.710, + "args": { + "External id": 289339,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957823.752, "dur": 1.702, + "args": { + "External id": 289340,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957831.710, "dur": 0.301, + "args": { + "External id": 289341,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957832.867, "dur": 0.388, + "args": { + "External id": 289342,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "8192256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957833.741, "dur": 0.694, + "args": { + "External id": 289343,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "16384256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366957851.548, "dur": 50.363, + "args": { + "External id": 289344,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333366957932.980, "dur": 106.806, + "args": { + "External id": 289345,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "24576256", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [], [], [], [], [], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366957946.527, "dur": 3.909, + "args": { + "External id": 289346,"Record function id": 0, "Concrete Inputs": ["[196610048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333366957956.256, "dur": 12.397, + "args": { + "External id": 289347,"Record function id": 0, "Concrete Inputs": ["", "0", "172033792", "24576256"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333366957960.909, "dur": 7.351, + "args": { + "External id": 289348,"Record function id": 0, "Concrete Inputs": ["", "0", "172033792", "196610048", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[196610048], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957966.326, "dur": 0.493, + "args": { + "External id": 289349,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "[1]", "172033792"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366957975.059, "dur": 12.399, + "args": { + "External id": 289350,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957976.418, "dur": 0.398, + "args": { + "External id": 289351,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "172033792"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957982.218, "dur": 0.194, + "args": { + "External id": 289352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "180225792"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957982.963, "dur": 0.198, + "args": { + "External id": 289353,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "180226048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366957983.833, "dur": 0.159, + "args": { + "External id": 289354,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "188418048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366957998.715, "dur": 32.802, + "args": { + "External id": 289355,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333366958086.949, "dur": 325.836, + "args": { + "External id": 289356,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[196610048], [24576256], [], [], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366958119.377, "dur": 288.341, + "args": { + "External id": 289357,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 196610048, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[24576256], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4889, "In msg nelems": 24576256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333366958131.321, "dur": 270.024, + "args": { + "External id": 289358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[24576256]], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366958440.309, "dur": 2.492, + "args": { + "External id": 289359,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4891, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2070554, "tid": 2070554, + "ts": 5333366958524.170, "dur": 6793.619, + "args": { + "External id": 289360,"Record function id": 0, "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366958600.832, "dur": 5.661, + "args": { + "External id": 289361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366958609.693, "dur": 1.221, + "args": { + "External id": 289362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366958612.608, "dur": 2.152, + "args": { + "External id": 289363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366958616.561, "dur": 0.799, + "args": { + "External id": 289364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366958619.005, "dur": 0.706, + "args": { + "External id": 289365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366958677.918, "dur": 6594.909, + "args": { + "External id": 289366,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366958696.882, "dur": 6567.931, + "args": { + "External id": 289367,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366958717.237, "dur": 5.670, + "args": { + "External id": 289368,"Record function id": 0, "Concrete Inputs": ["[3447]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366958729.397, "dur": 6499.925, + "args": { + "External id": 289369,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], [], []], "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366958731.899, "dur": 6496.690, + "args": { + "External id": 289370,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], []], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366958737.811, "dur": 5.353, + "args": { + "External id": 289371,"Record function id": 0, "Concrete Inputs": ["[3447]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366958744.667, "dur": 6480.104, + "args": { + "External id": 289372,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3447], [3447], []], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333366965499.951, "dur": 35.672, + "args": { + "External id": 289373,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2070554, "tid": 2070554, + "ts": 5333366965536.602, "dur": 249.964, + "args": { + "External id": 289374,"Record function id": 0, "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333366965581.080, "dur": 196.580, + "args": { + "External id": 289375,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[2048, 1], [4096, 1]], "Input Dims": [[32000, 2048], [16, 4096]], "Ev Idx": 4907 + } + }, + { + "ph": "s", "id": 72, "pid": 2070554, "tid": 2070554, "ts": 5333366965581.080, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2070554, "tid": 2070554, + "ts": 5333366965685.801, "dur": 54.972, + "args": { + "External id": 289376,"kernel_hash": "cfktqnhujkvemjetr4frr6i6zlhasbqswa6l3ebbfx7l4gnf2ioq", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/fk/cfktqnhujkvemjetr4frr6i6zlhasbqswa6l3ebbfx7l4gnf2ioq.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096], [32000, 2048], [16, 4096, 2048], []], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333366965845.912, "dur": 54.033, + "args": { + "External id": 289377,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2070554, "tid": 2070554, + "ts": 5333366965910.497, "dur": 7170.051, + "args": { + "External id": 289378,"Record function id": 0, "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070554, "tid": 2070554, + "ts": 5333366965918.418, "dur": 852.837, + "args": { + "External id": 289379,"Record function id": 0, "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366965988.505, "dur": 9.740, + "args": { + "External id": 289380,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366966010.704, "dur": 43.401, + "args": { + "External id": 289381,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966021.618, "dur": 2.538, + "args": { + "External id": 289382,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966025.733, "dur": 2.008, + "args": { + "External id": 289383,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966028.479, "dur": 2.897, + "args": { + "External id": 289384,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966033.941, "dur": 0.200, + "args": { + "External id": 289385,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966034.669, "dur": 0.468, + "args": { + "External id": 289386,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966039.757, "dur": 0.360, + "args": { + "External id": 289387,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966041.091, "dur": 0.431, + "args": { + "External id": 289388,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966042.383, "dur": 0.481, + "args": { + "External id": 289389,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966048.130, "dur": 0.272, + "args": { + "External id": 289390,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366966064.734, "dur": 33.463, + "args": { + "External id": 289391,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333366966133.913, "dur": 140.600, + "args": { + "External id": 289392,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366966145.407, "dur": 5.976, + "args": { + "External id": 289393,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333366966156.547, "dur": 30.773, + "args": { + "External id": 289394,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333366966161.061, "dur": 25.768, + "args": { + "External id": 289395,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966183.537, "dur": 1.057, + "args": { + "External id": 289396,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366966196.597, "dur": 29.385, + "args": { + "External id": 289397,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966198.334, "dur": 0.293, + "args": { + "External id": 289398,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966201.643, "dur": 0.348, + "args": { + "External id": 289399,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966205.637, "dur": 0.574, + "args": { + "External id": 289400,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966209.043, "dur": 0.180, + "args": { + "External id": 289401,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966210.045, "dur": 2.620, + "args": { + "External id": 289402,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966213.393, "dur": 0.387, + "args": { + "External id": 289403,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966216.330, "dur": 1.355, + "args": { + "External id": 289404,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966221.003, "dur": 0.321, + "args": { + "External id": 289405,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366966221.976, "dur": 0.181, + "args": { + "External id": 289406,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366966239.684, "dur": 25.270, + "args": { + "External id": 289407,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333366966325.522, "dur": 348.261, + "args": { + "External id": 289408,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366966354.416, "dur": 313.631, + "args": { + "External id": 289409,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4941, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333366966364.535, "dur": 296.314, + "args": { + "External id": 289410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366966699.437, "dur": 2.903, + "args": { + "External id": 289411,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4943, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070554, "tid": 2070554, + "ts": 5333366966793.084, "dur": 6087.958, + "args": { + "External id": 289412,"Record function id": 0, "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966887.655, "dur": 5.924, + "args": { + "External id": 289413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966896.603, "dur": 1.047, + "args": { + "External id": 289414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966899.037, "dur": 0.927, + "args": { + "External id": 289415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966901.705, "dur": 0.803, + "args": { + "External id": 289416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966903.999, "dur": 0.609, + "args": { + "External id": 289417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966905.747, "dur": 2.193, + "args": { + "External id": 289418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966909.315, "dur": 1.127, + "args": { + "External id": 289419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966914.181, "dur": 2.187, + "args": { + "External id": 289420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966917.898, "dur": 0.699, + "args": { + "External id": 289421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366966919.732, "dur": 0.867, + "args": { + "External id": 289422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366966938.268, "dur": 5898.027, + "args": { + "External id": 289423,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366966955.009, "dur": 5873.552, + "args": { + "External id": 289424,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366966978.149, "dur": 13.875, + "args": { + "External id": 289425,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366966995.439, "dur": 5797.461, + "args": { + "External id": 289426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366966998.166, "dur": 5794.093, + "args": { + "External id": 289427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366967004.014, "dur": 4.862, + "args": { + "External id": 289428,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366967010.328, "dur": 5778.560, + "args": { + "External id": 289429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333366973024.289, "dur": 31.305, + "args": { + "External id": 289430,"Sequence number": 1209161, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4962 + } + }, + { + "ph": "s", "id": 71, "pid": 2070554, "tid": 2070554, "ts": 5333366973024.289, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333366973040.237, "dur": 10.852, + "args": { + "External id": 289431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366973045.854, "dur": 4.999, + "args": { + "External id": 289432,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333366973123.488, "dur": 124.645, + "args": { + "External id": 289433,"Record function id": 0, "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333366973251.331, "dur": 1182.425, + "args": { + "External id": 289434,"Record function id": 0, "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333366973294.162, "dur": 1125.556, + "args": { + "External id": 289435,"Sequence number": 1209162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 4967 + } + }, + { + "ph": "s", "id": 70, "pid": 2070554, "tid": 2070554, "ts": 5333366973294.162, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333366973364.364, "dur": 49.933, + "args": { + "External id": 289436,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366973427.456, "dur": 108.236, + "args": { + "External id": 289437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366973544.330, "dur": 39.369, + "args": { + "External id": 289438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366973593.816, "dur": 96.292, + "args": { + "External id": 289439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333366973722.622, "dur": 31.322, + "args": { + "External id": 289440,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333366973774.105, "dur": 16.047, + "args": { + "External id": 289441,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333366973809.945, "dur": 134.036, + "args": { + "External id": 289442,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333366973863.863, "dur": 11.852, + "args": { + "External id": 289443,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366973868.801, "dur": 6.125, + "args": { + "External id": 289444,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366973878.289, "dur": 3.641, + "args": { + "External id": 289445,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366973885.049, "dur": 1.204, + "args": { + "External id": 289446,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366973888.394, "dur": 3.330, + "args": { + "External id": 289447,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366973954.788, "dur": 51.979, + "args": { + "External id": 289448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333366974037.109, "dur": 31.342, + "args": { + "External id": 289449,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366974076.196, "dur": 43.279, + "args": { + "External id": 289450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366974128.039, "dur": 35.967, + "args": { + "External id": 289451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333366974215.396, "dur": 26.919, + "args": { + "External id": 289452,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366974248.680, "dur": 40.232, + "args": { + "External id": 289453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333366974316.380, "dur": 23.146, + "args": { + "External id": 289454,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2070554, "tid": 2070554, + "ts": 5333366974499.165, "dur": 77.499, + "args": { + "External id": 289455,"Record function id": 0, "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333366974711.737, "dur": 47.076, + "args": { + "External id": 289456,"Record function id": 0, "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2070554, "tid": 2070554, + "ts": 5333366974768.810, "dur": 19196.432, + "args": { + "External id": 289457,"Record function id": 0, "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070554, "tid": 2070554, + "ts": 5333366974777.994, "dur": 1009.846, + "args": { + "External id": 289458,"Record function id": 0, "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366974859.111, "dur": 9.536, + "args": { + "External id": 289459,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366974881.806, "dur": 45.868, + "args": { + "External id": 289460,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974889.433, "dur": 4.440, + "args": { + "External id": 289461,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974897.923, "dur": 0.396, + "args": { + "External id": 289462,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974901.297, "dur": 0.415, + "args": { + "External id": 289463,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974905.236, "dur": 0.326, + "args": { + "External id": 289464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974906.539, "dur": 0.180, + "args": { + "External id": 289465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974910.179, "dur": 2.765, + "args": { + "External id": 289466,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974913.964, "dur": 1.281, + "args": { + "External id": 289467,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974915.911, "dur": 0.339, + "args": { + "External id": 289468,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366974921.044, "dur": 0.179, + "args": { + "External id": 289469,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366974938.983, "dur": 46.615, + "args": { + "External id": 289470,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333366975022.960, "dur": 123.090, + "args": { + "External id": 289471,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366975037.143, "dur": 4.190, + "args": { + "External id": 289472,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333366975046.515, "dur": 9.932, + "args": { + "External id": 289473,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333366975051.038, "dur": 5.011, + "args": { + "External id": 289474,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975054.275, "dur": 0.458, + "args": { + "External id": 289475,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366975063.381, "dur": 31.003, + "args": { + "External id": 289476,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975066.843, "dur": 0.488, + "args": { + "External id": 289477,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975068.400, "dur": 0.374, + "args": { + "External id": 289478,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975071.810, "dur": 0.501, + "args": { + "External id": 289479,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975074.784, "dur": 1.444, + "args": { + "External id": 289480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975076.954, "dur": 0.156, + "args": { + "External id": 289481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975080.359, "dur": 0.514, + "args": { + "External id": 289482,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975083.776, "dur": 0.171, + "args": { + "External id": 289483,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975084.758, "dur": 2.467, + "args": { + "External id": 289484,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366975089.869, "dur": 0.178, + "args": { + "External id": 289485,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366975107.909, "dur": 28.360, + "args": { + "External id": 289486,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333366975231.277, "dur": 448.933, + "args": { + "External id": 289487,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366975282.161, "dur": 333.726, + "args": { + "External id": 289488,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5020, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333366975293.322, "dur": 315.865, + "args": { + "External id": 289489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366975708.525, "dur": 3.620, + "args": { + "External id": 289490,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5022, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070554, "tid": 2070554, + "ts": 5333366975810.642, "dur": 17955.981, + "args": { + "External id": 289491,"Record function id": 0, "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975910.932, "dur": 6.995, + "args": { + "External id": 289492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975921.298, "dur": 1.021, + "args": { + "External id": 289493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975923.934, "dur": 1.859, + "args": { + "External id": 289494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975927.305, "dur": 0.832, + "args": { + "External id": 289495,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975929.460, "dur": 0.655, + "args": { + "External id": 289496,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975933.349, "dur": 0.589, + "args": { + "External id": 289497,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975935.445, "dur": 0.844, + "args": { + "External id": 289498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975937.997, "dur": 1.655, + "args": { + "External id": 289499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975940.981, "dur": 0.642, + "args": { + "External id": 289500,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366975945.488, "dur": 0.966, + "args": { + "External id": 289501,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366975965.903, "dur": 17755.705, + "args": { + "External id": 289502,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366975982.363, "dur": 17731.079, + "args": { + "External id": 289503,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366976002.978, "dur": 16.743, + "args": { + "External id": 289504,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366976024.016, "dur": 17649.386, + "args": { + "External id": 289505,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366976026.376, "dur": 17646.295, + "args": { + "External id": 289506,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366976033.090, "dur": 5.489, + "args": { + "External id": 289507,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366976040.364, "dur": 17629.201, + "args": { + "External id": 289508,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333366993908.065, "dur": 32.057, + "args": { + "External id": 289509,"Sequence number": 1209163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5041 + } + }, + { + "ph": "s", "id": 69, "pid": 2070554, "tid": 2070554, "ts": 5333366993908.065, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333366993926.263, "dur": 9.253, + "args": { + "External id": 289510,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366993930.090, "dur": 5.196, + "args": { + "External id": 289511,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333366994007.128, "dur": 94.914, + "args": { + "External id": 289512,"Record function id": 0, "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333366994103.392, "dur": 1131.130, + "args": { + "External id": 289513,"Record function id": 0, "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333366994142.923, "dur": 1076.787, + "args": { + "External id": 289514,"Sequence number": 1209164, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5046 + } + }, + { + "ph": "s", "id": 68, "pid": 2070554, "tid": 2070554, "ts": 5333366994142.923, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333366994232.893, "dur": 47.295, + "args": { + "External id": 289515,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366994293.633, "dur": 107.054, + "args": { + "External id": 289516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366994411.418, "dur": 38.325, + "args": { + "External id": 289517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366994458.364, "dur": 31.570, + "args": { + "External id": 289518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333366994514.281, "dur": 29.246, + "args": { + "External id": 289519,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333366994561.467, "dur": 15.532, + "args": { + "External id": 289520,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333366994596.118, "dur": 175.468, + "args": { + "External id": 289521,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333366994688.878, "dur": 15.567, + "args": { + "External id": 289522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366994694.409, "dur": 9.082, + "args": { + "External id": 289523,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366994706.926, "dur": 3.656, + "args": { + "External id": 289524,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366994711.716, "dur": 0.972, + "args": { + "External id": 289525,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366994715.058, "dur": 2.862, + "args": { + "External id": 289526,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366994783.259, "dur": 51.844, + "args": { + "External id": 289527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333366994867.373, "dur": 31.348, + "args": { + "External id": 289528,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366994906.292, "dur": 43.275, + "args": { + "External id": 289529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366994959.072, "dur": 35.162, + "args": { + "External id": 289530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333366995017.753, "dur": 27.470, + "args": { + "External id": 289531,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333366995051.075, "dur": 35.501, + "args": { + "External id": 289532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333366995109.789, "dur": 19.571, + "args": { + "External id": 289533,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2070554, "tid": 2070554, + "ts": 5333366995301.094, "dur": 78.091, + "args": { + "External id": 289534,"Record function id": 0, "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333366995451.388, "dur": 45.124, + "args": { + "External id": 289535,"Record function id": 0, "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2070554, "tid": 2070554, + "ts": 5333366995505.257, "dur": 18643.174, + "args": { + "External id": 289536,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070554, "tid": 2070554, + "ts": 5333366995515.440, "dur": 866.899, + "args": { + "External id": 289537,"Record function id": 0, "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366995595.119, "dur": 9.043, + "args": { + "External id": 289538,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366995617.537, "dur": 78.389, + "args": { + "External id": 289539,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995661.939, "dur": 2.335, + "args": { + "External id": 289540,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995671.306, "dur": 0.455, + "args": { + "External id": 289541,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995672.583, "dur": 0.631, + "args": { + "External id": 289542,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995674.264, "dur": 0.603, + "args": { + "External id": 289543,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995679.531, "dur": 1.706, + "args": { + "External id": 289544,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995682.296, "dur": 0.194, + "args": { + "External id": 289545,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995684.187, "dur": 2.760, + "args": { + "External id": 289546,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995687.987, "dur": 0.523, + "args": { + "External id": 289547,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995689.125, "dur": 0.179, + "args": { + "External id": 289548,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366995710.043, "dur": 48.993, + "args": { + "External id": 289549,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333366995795.250, "dur": 119.479, + "args": { + "External id": 289550,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366995809.989, "dur": 5.152, + "args": { + "External id": 289551,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333366995820.384, "dur": 9.672, + "args": { + "External id": 289552,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333366995825.035, "dur": 4.635, + "args": { + "External id": 289553,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995828.020, "dur": 0.372, + "args": { + "External id": 289554,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333366995838.436, "dur": 31.163, + "args": { + "External id": 289555,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995840.068, "dur": 2.558, + "args": { + "External id": 289556,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995843.626, "dur": 1.502, + "args": { + "External id": 289557,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995847.981, "dur": 0.297, + "args": { + "External id": 289558,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995851.706, "dur": 0.181, + "args": { + "External id": 289559,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995853.833, "dur": 0.538, + "args": { + "External id": 289560,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995855.240, "dur": 0.348, + "args": { + "External id": 289561,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995859.749, "dur": 0.148, + "args": { + "External id": 289562,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995862.289, "dur": 0.419, + "args": { + "External id": 289563,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366995863.374, "dur": 2.403, + "args": { + "External id": 289564,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366995880.715, "dur": 24.677, + "args": { + "External id": 289565,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333366995968.999, "dur": 318.786, + "args": { + "External id": 289566,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366996002.910, "dur": 279.627, + "args": { + "External id": 289567,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5099, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333366996016.439, "dur": 259.683, + "args": { + "External id": 289568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333366996313.174, "dur": 2.185, + "args": { + "External id": 289569,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5101, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070554, "tid": 2070554, + "ts": 5333366996403.728, "dur": 17552.087, + "args": { + "External id": 289570,"Record function id": 0, "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996496.439, "dur": 6.207, + "args": { + "External id": 289571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996506.069, "dur": 0.880, + "args": { + "External id": 289572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996508.571, "dur": 0.805, + "args": { + "External id": 289573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996511.070, "dur": 1.020, + "args": { + "External id": 289574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996513.414, "dur": 0.966, + "args": { + "External id": 289575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996515.534, "dur": 0.883, + "args": { + "External id": 289576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996520.433, "dur": 0.591, + "args": { + "External id": 289577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996522.424, "dur": 3.143, + "args": { + "External id": 289578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996526.722, "dur": 0.983, + "args": { + "External id": 289579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333366996529.109, "dur": 0.595, + "args": { + "External id": 289580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366996550.955, "dur": 17360.372, + "args": { + "External id": 289581,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366996565.786, "dur": 17337.220, + "args": { + "External id": 289582,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333366996585.381, "dur": 13.161, + "args": { + "External id": 289583,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333366996602.160, "dur": 17263.491, + "args": { + "External id": 289584,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333366996604.851, "dur": 17260.071, + "args": { + "External id": 289585,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333366996611.983, "dur": 5.109, + "args": { + "External id": 289586,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333366996656.913, "dur": 17204.783, + "args": { + "External id": 289587,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367014094.007, "dur": 30.237, + "args": { + "External id": 289588,"Sequence number": 1209165, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5120 + } + }, + { + "ph": "s", "id": 67, "pid": 2070554, "tid": 2070554, "ts": 5333367014094.007, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367014109.992, "dur": 9.584, + "args": { + "External id": 289589,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367014114.027, "dur": 5.304, + "args": { + "External id": 289590,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367014204.901, "dur": 94.875, + "args": { + "External id": 289591,"Record function id": 0, "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367014302.007, "dur": 1157.212, + "args": { + "External id": 289592,"Record function id": 0, "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367014346.159, "dur": 1099.731, + "args": { + "External id": 289593,"Sequence number": 1209166, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5125 + } + }, + { + "ph": "s", "id": 66, "pid": 2070554, "tid": 2070554, "ts": 5333367014346.159, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367014417.291, "dur": 48.991, + "args": { + "External id": 289594,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367014479.034, "dur": 108.586, + "args": { + "External id": 289595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367014595.497, "dur": 83.107, + "args": { + "External id": 289596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367014692.866, "dur": 36.805, + "args": { + "External id": 289597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367014756.163, "dur": 31.117, + "args": { + "External id": 289598,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367014807.155, "dur": 16.343, + "args": { + "External id": 289599,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367014842.549, "dur": 133.186, + "args": { + "External id": 289600,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367014893.656, "dur": 13.777, + "args": { + "External id": 289601,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367014900.109, "dur": 6.590, + "args": { + "External id": 289602,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367014909.926, "dur": 5.900, + "args": { + "External id": 289603,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367014917.092, "dur": 1.082, + "args": { + "External id": 289604,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367014920.417, "dur": 2.719, + "args": { + "External id": 289605,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367014986.587, "dur": 51.174, + "args": { + "External id": 289606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367015067.582, "dur": 30.645, + "args": { + "External id": 289607,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367015106.867, "dur": 42.174, + "args": { + "External id": 289608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367015157.738, "dur": 65.762, + "args": { + "External id": 289609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367015249.632, "dur": 27.471, + "args": { + "External id": 289610,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367015283.253, "dur": 37.752, + "args": { + "External id": 289611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367015344.870, "dur": 23.364, + "args": { + "External id": 289612,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2070554, "tid": 2070554, + "ts": 5333367015524.767, "dur": 78.702, + "args": { + "External id": 289613,"Record function id": 0, "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367015736.855, "dur": 52.324, + "args": { + "External id": 289614,"Record function id": 0, "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2070554, "tid": 2070554, + "ts": 5333367015799.733, "dur": 18234.584, + "args": { + "External id": 289615,"Record function id": 0, "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070554, "tid": 2070554, + "ts": 5333367015809.713, "dur": 897.967, + "args": { + "External id": 289616,"Record function id": 0, "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367015890.720, "dur": 10.037, + "args": { + "External id": 289617,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367015914.319, "dur": 42.362, + "args": { + "External id": 289618,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015919.572, "dur": 2.376, + "args": { + "External id": 289619,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015927.804, "dur": 0.408, + "args": { + "External id": 289620,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015929.165, "dur": 0.446, + "args": { + "External id": 289621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015931.645, "dur": 0.600, + "args": { + "External id": 289622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015935.663, "dur": 0.496, + "args": { + "External id": 289623,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015936.798, "dur": 0.905, + "args": { + "External id": 289624,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015940.292, "dur": 4.210, + "args": { + "External id": 289625,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015945.560, "dur": 0.273, + "args": { + "External id": 289626,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367015949.067, "dur": 0.147, + "args": { + "External id": 289627,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367015969.489, "dur": 47.269, + "args": { + "External id": 289628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367016052.164, "dur": 157.730, + "args": { + "External id": 289629,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367016064.724, "dur": 3.900, + "args": { + "External id": 289630,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367016073.503, "dur": 14.308, + "args": { + "External id": 289631,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367016078.105, "dur": 9.259, + "args": { + "External id": 289632,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016082.865, "dur": 2.930, + "args": { + "External id": 289633,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367016096.950, "dur": 29.895, + "args": { + "External id": 289634,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016098.750, "dur": 0.573, + "args": { + "External id": 289635,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016101.954, "dur": 2.913, + "args": { + "External id": 289636,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016105.900, "dur": 0.608, + "args": { + "External id": 289637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016107.169, "dur": 1.921, + "args": { + "External id": 289638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016113.647, "dur": 0.176, + "args": { + "External id": 289639,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016114.743, "dur": 0.363, + "args": { + "External id": 289640,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016115.979, "dur": 0.149, + "args": { + "External id": 289641,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016120.854, "dur": 0.162, + "args": { + "External id": 289642,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016121.906, "dur": 0.142, + "args": { + "External id": 289643,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367016140.505, "dur": 59.930, + "args": { + "External id": 289644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367016264.368, "dur": 305.651, + "args": { + "External id": 289645,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367016297.691, "dur": 267.759, + "args": { + "External id": 289646,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5178, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367016309.654, "dur": 248.585, + "args": { + "External id": 289647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367016591.695, "dur": 2.327, + "args": { + "External id": 289648,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5180, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070554, "tid": 2070554, + "ts": 5333367016733.006, "dur": 17104.588, + "args": { + "External id": 289649,"Record function id": 0, "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016827.526, "dur": 6.524, + "args": { + "External id": 289650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016837.525, "dur": 1.119, + "args": { + "External id": 289651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016840.403, "dur": 2.235, + "args": { + "External id": 289652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016844.384, "dur": 0.946, + "args": { + "External id": 289653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016846.815, "dur": 1.110, + "args": { + "External id": 289654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016849.452, "dur": 0.886, + "args": { + "External id": 289655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016854.046, "dur": 0.970, + "args": { + "External id": 289656,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016856.493, "dur": 2.451, + "args": { + "External id": 289657,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016860.184, "dur": 0.947, + "args": { + "External id": 289658,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367016863.608, "dur": 0.677, + "args": { + "External id": 289659,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367016884.488, "dur": 16908.796, + "args": { + "External id": 289660,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367016900.208, "dur": 16885.349, + "args": { + "External id": 289661,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367016921.107, "dur": 14.769, + "args": { + "External id": 289662,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367016940.212, "dur": 16809.200, + "args": { + "External id": 289663,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367016943.010, "dur": 16805.673, + "args": { + "External id": 289664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367016954.965, "dur": 5.871, + "args": { + "External id": 289665,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367016962.788, "dur": 16782.480, + "args": { + "External id": 289666,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367033973.846, "dur": 33.515, + "args": { + "External id": 289667,"Sequence number": 1209167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5199 + } + }, + { + "ph": "s", "id": 65, "pid": 2070554, "tid": 2070554, "ts": 5333367033973.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367033993.727, "dur": 8.914, + "args": { + "External id": 289668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367033997.590, "dur": 4.855, + "args": { + "External id": 289669,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367034073.598, "dur": 112.830, + "args": { + "External id": 289670,"Record function id": 0, "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367034189.569, "dur": 1144.076, + "args": { + "External id": 289671,"Record function id": 0, "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367034234.837, "dur": 1085.088, + "args": { + "External id": 289672,"Sequence number": 1209168, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5204 + } + }, + { + "ph": "s", "id": 64, "pid": 2070554, "tid": 2070554, "ts": 5333367034234.837, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367034303.176, "dur": 48.599, + "args": { + "External id": 289673,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367034364.826, "dur": 108.832, + "args": { + "External id": 289674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367034483.284, "dur": 39.308, + "args": { + "External id": 289675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367034531.836, "dur": 31.765, + "args": { + "External id": 289676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367034589.536, "dur": 26.330, + "args": { + "External id": 289677,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367034685.418, "dur": 18.912, + "args": { + "External id": 289678,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367034724.604, "dur": 134.411, + "args": { + "External id": 289679,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367034775.086, "dur": 12.957, + "args": { + "External id": 289680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367034780.467, "dur": 6.809, + "args": { + "External id": 289681,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367034790.632, "dur": 5.192, + "args": { + "External id": 289682,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367034797.427, "dur": 1.106, + "args": { + "External id": 289683,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367034801.256, "dur": 5.329, + "args": { + "External id": 289684,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367034869.531, "dur": 53.754, + "args": { + "External id": 289685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367034952.468, "dur": 29.684, + "args": { + "External id": 289686,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367034991.518, "dur": 42.969, + "args": { + "External id": 289687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367035043.972, "dur": 35.044, + "args": { + "External id": 289688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367035100.844, "dur": 26.694, + "args": { + "External id": 289689,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367035132.992, "dur": 62.678, + "args": { + "External id": 289690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367035220.267, "dur": 26.052, + "args": { + "External id": 289691,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2070554, "tid": 2070554, + "ts": 5333367035399.711, "dur": 73.420, + "args": { + "External id": 289692,"Record function id": 0, "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367035545.987, "dur": 45.736, + "args": { + "External id": 289693,"Record function id": 0, "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2070554, "tid": 2070554, + "ts": 5333367035600.060, "dur": 18276.032, + "args": { + "External id": 289694,"Record function id": 0, "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070554, "tid": 2070554, + "ts": 5333367035607.724, "dur": 939.691, + "args": { + "External id": 289695,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367035745.732, "dur": 9.587, + "args": { + "External id": 289696,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367035769.983, "dur": 41.676, + "args": { + "External id": 289697,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035775.868, "dur": 2.223, + "args": { + "External id": 289698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035783.244, "dur": 0.471, + "args": { + "External id": 289699,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035785.643, "dur": 0.564, + "args": { + "External id": 289700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035788.034, "dur": 0.613, + "args": { + "External id": 289701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035793.121, "dur": 0.504, + "args": { + "External id": 289702,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035795.304, "dur": 0.380, + "args": { + "External id": 289703,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035796.873, "dur": 3.480, + "args": { + "External id": 289704,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035801.844, "dur": 0.354, + "args": { + "External id": 289705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035803.654, "dur": 0.161, + "args": { + "External id": 289706,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367035822.501, "dur": 48.189, + "args": { + "External id": 289707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367035906.560, "dur": 123.436, + "args": { + "External id": 289708,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367035919.212, "dur": 4.189, + "args": { + "External id": 289709,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367035928.498, "dur": 11.035, + "args": { + "External id": 289710,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367035933.231, "dur": 5.912, + "args": { + "External id": 289711,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035937.168, "dur": 0.698, + "args": { + "External id": 289712,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367035946.024, "dur": 39.023, + "args": { + "External id": 289713,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035948.047, "dur": 2.997, + "args": { + "External id": 289714,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035952.659, "dur": 0.360, + "args": { + "External id": 289715,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035954.539, "dur": 0.346, + "args": { + "External id": 289716,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035959.083, "dur": 1.704, + "args": { + "External id": 289717,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035962.479, "dur": 0.565, + "args": { + "External id": 289718,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035969.756, "dur": 0.594, + "args": { + "External id": 289719,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035974.169, "dur": 0.404, + "args": { + "External id": 289720,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035975.787, "dur": 0.290, + "args": { + "External id": 289721,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367035977.564, "dur": 2.834, + "args": { + "External id": 289722,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367035995.949, "dur": 25.806, + "args": { + "External id": 289723,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367036085.203, "dur": 361.064, + "args": { + "External id": 289724,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367036118.580, "dur": 322.622, + "args": { + "External id": 289725,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5257, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367036130.891, "dur": 304.049, + "args": { + "External id": 289726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367036472.378, "dur": 2.662, + "args": { + "External id": 289727,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5259, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070554, "tid": 2070554, + "ts": 5333367036570.044, "dur": 17100.846, + "args": { + "External id": 289728,"Record function id": 0, "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036716.533, "dur": 6.895, + "args": { + "External id": 289729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036727.846, "dur": 1.180, + "args": { + "External id": 289730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036730.913, "dur": 2.027, + "args": { + "External id": 289731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036734.728, "dur": 1.040, + "args": { + "External id": 289732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036737.362, "dur": 0.870, + "args": { + "External id": 289733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036739.730, "dur": 0.916, + "args": { + "External id": 289734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036744.286, "dur": 0.983, + "args": { + "External id": 289735,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036746.855, "dur": 2.634, + "args": { + "External id": 289736,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036751.105, "dur": 0.505, + "args": { + "External id": 289737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367036753.188, "dur": 0.656, + "args": { + "External id": 289738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367036776.131, "dur": 16820.647, + "args": { + "External id": 289739,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367036791.873, "dur": 16797.061, + "args": { + "External id": 289740,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367036816.864, "dur": 15.333, + "args": { + "External id": 289741,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367036836.703, "dur": 16716.007, + "args": { + "External id": 289742,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367036839.252, "dur": 16712.726, + "args": { + "External id": 289743,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367036845.698, "dur": 6.748, + "args": { + "External id": 289744,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367036854.315, "dur": 16694.881, + "args": { + "External id": 289745,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367053819.540, "dur": 30.550, + "args": { + "External id": 289746,"Sequence number": 1209169, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5278 + } + }, + { + "ph": "s", "id": 63, "pid": 2070554, "tid": 2070554, "ts": 5333367053819.540, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367053836.212, "dur": 9.159, + "args": { + "External id": 289747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367053840.133, "dur": 4.983, + "args": { + "External id": 289748,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367053915.715, "dur": 97.076, + "args": { + "External id": 289749,"Record function id": 0, "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367054014.443, "dur": 1113.228, + "args": { + "External id": 289750,"Record function id": 0, "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367054057.171, "dur": 1056.866, + "args": { + "External id": 289751,"Sequence number": 1209170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5283 + } + }, + { + "ph": "s", "id": 62, "pid": 2070554, "tid": 2070554, "ts": 5333367054057.171, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367054124.265, "dur": 65.076, + "args": { + "External id": 289752,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367054206.083, "dur": 108.876, + "args": { + "External id": 289753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367054324.588, "dur": 39.817, + "args": { + "External id": 289754,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367054372.683, "dur": 32.288, + "args": { + "External id": 289755,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367054432.247, "dur": 28.252, + "args": { + "External id": 289756,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367054478.226, "dur": 15.912, + "args": { + "External id": 289757,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367054513.160, "dur": 169.408, + "args": { + "External id": 289758,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367054562.230, "dur": 12.202, + "args": { + "External id": 289759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367054567.526, "dur": 5.811, + "args": { + "External id": 289760,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367054577.168, "dur": 4.770, + "args": { + "External id": 289761,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367054583.245, "dur": 1.070, + "args": { + "External id": 289762,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367054586.808, "dur": 4.838, + "args": { + "External id": 289763,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367054695.821, "dur": 56.063, + "args": { + "External id": 289764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367054783.283, "dur": 31.904, + "args": { + "External id": 289765,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367054823.932, "dur": 42.834, + "args": { + "External id": 289766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367054876.849, "dur": 35.748, + "args": { + "External id": 289767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367054933.104, "dur": 24.366, + "args": { + "External id": 289768,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367054963.603, "dur": 34.711, + "args": { + "External id": 289769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367055020.993, "dur": 17.695, + "args": { + "External id": 289770,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2070554, "tid": 2070554, + "ts": 5333367055210.369, "dur": 75.240, + "args": { + "External id": 289771,"Record function id": 0, "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367055361.587, "dur": 48.908, + "args": { + "External id": 289772,"Record function id": 0, "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2070554, "tid": 2070554, + "ts": 5333367055419.734, "dur": 18453.440, + "args": { + "External id": 289773,"Record function id": 0, "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070554, "tid": 2070554, + "ts": 5333367055427.726, "dur": 927.495, + "args": { + "External id": 289774,"Record function id": 0, "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367055508.505, "dur": 9.167, + "args": { + "External id": 289775,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367055531.433, "dur": 40.871, + "args": { + "External id": 289776,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055537.086, "dur": 2.086, + "args": { + "External id": 289777,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055544.609, "dur": 0.243, + "args": { + "External id": 289778,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055546.290, "dur": 0.699, + "args": { + "External id": 289779,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055548.682, "dur": 0.602, + "args": { + "External id": 289780,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055553.115, "dur": 0.640, + "args": { + "External id": 289781,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055555.386, "dur": 0.558, + "args": { + "External id": 289782,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055557.240, "dur": 3.967, + "args": { + "External id": 289783,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055562.641, "dur": 0.450, + "args": { + "External id": 289784,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055564.786, "dur": 0.338, + "args": { + "External id": 289785,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367055587.226, "dur": 87.009, + "args": { + "External id": 289786,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367055716.865, "dur": 161.556, + "args": { + "External id": 289787,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367055730.610, "dur": 7.456, + "args": { + "External id": 289788,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367055743.721, "dur": 11.409, + "args": { + "External id": 289789,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367055748.552, "dur": 6.139, + "args": { + "External id": 289790,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055752.465, "dur": 0.592, + "args": { + "External id": 289791,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367055762.618, "dur": 56.676, + "args": { + "External id": 289792,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055792.213, "dur": 0.502, + "args": { + "External id": 289793,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055796.900, "dur": 0.197, + "args": { + "External id": 289794,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055798.569, "dur": 0.166, + "args": { + "External id": 289795,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055800.262, "dur": 2.847, + "args": { + "External id": 289796,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055804.570, "dur": 0.272, + "args": { + "External id": 289797,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055806.331, "dur": 0.335, + "args": { + "External id": 289798,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055810.440, "dur": 0.189, + "args": { + "External id": 289799,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055812.002, "dur": 0.219, + "args": { + "External id": 289800,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367055813.712, "dur": 0.453, + "args": { + "External id": 289801,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367055841.224, "dur": 28.680, + "args": { + "External id": 289802,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367055933.753, "dur": 321.999, + "args": { + "External id": 289803,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367055966.871, "dur": 284.050, + "args": { + "External id": 289804,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5336, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367055981.870, "dur": 262.971, + "args": { + "External id": 289805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367056280.966, "dur": 2.270, + "args": { + "External id": 289806,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5338, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070554, "tid": 2070554, + "ts": 5333367056377.744, "dur": 17293.689, + "args": { + "External id": 289807,"Record function id": 0, "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056471.674, "dur": 6.205, + "args": { + "External id": 289808,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056481.971, "dur": 0.920, + "args": { + "External id": 289809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056484.937, "dur": 2.264, + "args": { + "External id": 289810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056488.892, "dur": 0.760, + "args": { + "External id": 289811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056491.634, "dur": 0.792, + "args": { + "External id": 289812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056493.701, "dur": 1.123, + "args": { + "External id": 289813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056498.565, "dur": 0.992, + "args": { + "External id": 289814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056501.137, "dur": 2.777, + "args": { + "External id": 289815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056505.351, "dur": 0.905, + "args": { + "External id": 289816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367056507.876, "dur": 1.076, + "args": { + "External id": 289817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367056529.489, "dur": 17069.867, + "args": { + "External id": 289818,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367056545.653, "dur": 17045.030, + "args": { + "External id": 289819,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367056568.354, "dur": 14.029, + "args": { + "External id": 289820,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367056585.826, "dur": 16967.974, + "args": { + "External id": 289821,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367056588.376, "dur": 16964.694, + "args": { + "External id": 289822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367056595.015, "dur": 6.530, + "args": { + "External id": 289823,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367056603.334, "dur": 16946.170, + "args": { + "External id": 289824,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367073817.764, "dur": 29.709, + "args": { + "External id": 289825,"Sequence number": 1209171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5357 + } + }, + { + "ph": "s", "id": 61, "pid": 2070554, "tid": 2070554, "ts": 5333367073817.764, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367073833.290, "dur": 9.377, + "args": { + "External id": 289826,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367073837.195, "dur": 5.215, + "args": { + "External id": 289827,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367073912.141, "dur": 96.650, + "args": { + "External id": 289828,"Record function id": 0, "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367074010.414, "dur": 1105.911, + "args": { + "External id": 289829,"Record function id": 0, "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367074051.549, "dur": 1050.942, + "args": { + "External id": 289830,"Sequence number": 1209172, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5362 + } + }, + { + "ph": "s", "id": 60, "pid": 2070554, "tid": 2070554, "ts": 5333367074051.549, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367074117.689, "dur": 46.473, + "args": { + "External id": 289831,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367074193.527, "dur": 107.367, + "args": { + "External id": 289832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367074311.271, "dur": 39.048, + "args": { + "External id": 289833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367074359.124, "dur": 32.412, + "args": { + "External id": 289834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367074419.887, "dur": 27.955, + "args": { + "External id": 289835,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367074465.675, "dur": 16.983, + "args": { + "External id": 289836,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367074502.229, "dur": 171.760, + "args": { + "External id": 289837,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367074551.388, "dur": 11.092, + "args": { + "External id": 289838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367074556.309, "dur": 5.320, + "args": { + "External id": 289839,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367074565.405, "dur": 5.452, + "args": { + "External id": 289840,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367074572.297, "dur": 1.087, + "args": { + "External id": 289841,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367074576.068, "dur": 4.770, + "args": { + "External id": 289842,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367074686.659, "dur": 56.005, + "args": { + "External id": 289843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367074776.645, "dur": 28.839, + "args": { + "External id": 289844,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367074815.896, "dur": 43.079, + "args": { + "External id": 289845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367074866.674, "dur": 35.859, + "args": { + "External id": 289846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367074925.078, "dur": 25.351, + "args": { + "External id": 289847,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367074955.743, "dur": 35.215, + "args": { + "External id": 289848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367075010.783, "dur": 17.375, + "args": { + "External id": 289849,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2070554, "tid": 2070554, + "ts": 5333367075200.205, "dur": 79.808, + "args": { + "External id": 289850,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367075355.517, "dur": 48.267, + "args": { + "External id": 289851,"Record function id": 0, "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2070554, "tid": 2070554, + "ts": 5333367075413.121, "dur": 18318.390, + "args": { + "External id": 289852,"Record function id": 0, "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070554, "tid": 2070554, + "ts": 5333367075420.192, "dur": 913.855, + "args": { + "External id": 289853,"Record function id": 0, "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367075500.839, "dur": 9.001, + "args": { + "External id": 289854,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367075523.797, "dur": 35.500, + "args": { + "External id": 289855,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075529.235, "dur": 2.375, + "args": { + "External id": 289856,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075536.023, "dur": 0.206, + "args": { + "External id": 289857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075537.266, "dur": 0.300, + "args": { + "External id": 289858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075538.736, "dur": 0.673, + "args": { + "External id": 289859,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075542.610, "dur": 0.383, + "args": { + "External id": 289860,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075543.989, "dur": 0.381, + "args": { + "External id": 289861,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075545.636, "dur": 3.727, + "args": { + "External id": 289862,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075550.562, "dur": 0.183, + "args": { + "External id": 289863,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075552.206, "dur": 0.170, + "args": { + "External id": 289864,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367075570.137, "dur": 43.513, + "args": { + "External id": 289865,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367075690.726, "dur": 125.257, + "args": { + "External id": 289866,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367075704.338, "dur": 6.058, + "args": { + "External id": 289867,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367075715.691, "dur": 11.663, + "args": { + "External id": 289868,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367075720.357, "dur": 6.561, + "args": { + "External id": 289869,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075724.344, "dur": 0.856, + "args": { + "External id": 289870,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367075734.506, "dur": 31.982, + "args": { + "External id": 289871,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075736.536, "dur": 3.004, + "args": { + "External id": 289872,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075741.042, "dur": 0.542, + "args": { + "External id": 289873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075742.842, "dur": 0.382, + "args": { + "External id": 289874,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075747.451, "dur": 1.670, + "args": { + "External id": 289875,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075750.043, "dur": 0.212, + "args": { + "External id": 289876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075751.240, "dur": 0.482, + "args": { + "External id": 289877,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075755.415, "dur": 0.323, + "args": { + "External id": 289878,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075756.976, "dur": 0.358, + "args": { + "External id": 289879,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367075758.396, "dur": 2.379, + "args": { + "External id": 289880,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367075779.025, "dur": 28.865, + "args": { + "External id": 289881,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367075872.755, "dur": 362.953, + "args": { + "External id": 289882,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367075907.619, "dur": 322.948, + "args": { + "External id": 289883,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5415, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367075918.496, "dur": 305.118, + "args": { + "External id": 289884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367076261.395, "dur": 2.551, + "args": { + "External id": 289885,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5417, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070554, "tid": 2070554, + "ts": 5333367076356.281, "dur": 17128.004, + "args": { + "External id": 289886,"Record function id": 0, "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076451.329, "dur": 6.529, + "args": { + "External id": 289887,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076461.478, "dur": 1.127, + "args": { + "External id": 289888,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076464.762, "dur": 2.310, + "args": { + "External id": 289889,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076468.658, "dur": 0.860, + "args": { + "External id": 289890,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076471.298, "dur": 0.887, + "args": { + "External id": 289891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076473.914, "dur": 0.795, + "args": { + "External id": 289892,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076478.079, "dur": 0.944, + "args": { + "External id": 289893,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076480.435, "dur": 2.299, + "args": { + "External id": 289894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076484.530, "dur": 0.824, + "args": { + "External id": 289895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367076486.856, "dur": 0.620, + "args": { + "External id": 289896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367076507.862, "dur": 16929.124, + "args": { + "External id": 289897,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367076523.551, "dur": 16905.398, + "args": { + "External id": 289898,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367076546.443, "dur": 15.054, + "args": { + "External id": 289899,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367076564.984, "dur": 16827.649, + "args": { + "External id": 289900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367076567.431, "dur": 16824.537, + "args": { + "External id": 289901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367076573.970, "dur": 5.548, + "args": { + "External id": 289902,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367076581.059, "dur": 16807.276, + "args": { + "External id": 289903,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367093668.633, "dur": 32.211, + "args": { + "External id": 289904,"Sequence number": 1209173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5436 + } + }, + { + "ph": "s", "id": 59, "pid": 2070554, "tid": 2070554, "ts": 5333367093668.633, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367093686.068, "dur": 9.936, + "args": { + "External id": 289905,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367093690.306, "dur": 5.366, + "args": { + "External id": 289906,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367093770.999, "dur": 97.150, + "args": { + "External id": 289907,"Record function id": 0, "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367093869.977, "dur": 1112.255, + "args": { + "External id": 289908,"Record function id": 0, "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367093912.227, "dur": 1057.551, + "args": { + "External id": 289909,"Sequence number": 1209174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5441 + } + }, + { + "ph": "s", "id": 58, "pid": 2070554, "tid": 2070554, "ts": 5333367093912.227, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367093980.940, "dur": 52.099, + "args": { + "External id": 289910,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367094045.087, "dur": 109.044, + "args": { + "External id": 289911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367094164.937, "dur": 59.883, + "args": { + "External id": 289912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367094237.008, "dur": 32.132, + "args": { + "External id": 289913,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367094295.551, "dur": 29.380, + "args": { + "External id": 289914,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367094343.338, "dur": 14.293, + "args": { + "External id": 289915,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367094376.840, "dur": 130.273, + "args": { + "External id": 289916,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367094428.053, "dur": 11.049, + "args": { + "External id": 289917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367094433.358, "dur": 4.844, + "args": { + "External id": 289918,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367094441.872, "dur": 6.206, + "args": { + "External id": 289919,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367094449.402, "dur": 0.850, + "args": { + "External id": 289920,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367094452.838, "dur": 3.576, + "args": { + "External id": 289921,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367094516.753, "dur": 45.629, + "args": { + "External id": 289922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367094591.250, "dur": 27.597, + "args": { + "External id": 289923,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367094669.795, "dur": 47.674, + "args": { + "External id": 289924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367094733.471, "dur": 36.942, + "args": { + "External id": 289925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367094796.972, "dur": 26.255, + "args": { + "External id": 289926,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367094830.353, "dur": 35.076, + "args": { + "External id": 289927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367094882.420, "dur": 20.412, + "args": { + "External id": 289928,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2070554, "tid": 2070554, + "ts": 5333367095047.661, "dur": 74.186, + "args": { + "External id": 289929,"Record function id": 0, "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367095211.341, "dur": 47.236, + "args": { + "External id": 289930,"Record function id": 0, "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2070554, "tid": 2070554, + "ts": 5333367095268.106, "dur": 18248.224, + "args": { + "External id": 289931,"Record function id": 0, "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070554, "tid": 2070554, + "ts": 5333367095276.657, "dur": 831.083, + "args": { + "External id": 289932,"Record function id": 0, "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367095357.096, "dur": 9.520, + "args": { + "External id": 289933,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367095379.880, "dur": 34.437, + "args": { + "External id": 289934,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095385.445, "dur": 2.433, + "args": { + "External id": 289935,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095392.820, "dur": 0.322, + "args": { + "External id": 289936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095394.563, "dur": 0.398, + "args": { + "External id": 289937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095396.008, "dur": 0.341, + "args": { + "External id": 289938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095399.538, "dur": 0.304, + "args": { + "External id": 289939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095400.867, "dur": 0.470, + "args": { + "External id": 289940,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095402.231, "dur": 2.843, + "args": { + "External id": 289941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095406.483, "dur": 0.215, + "args": { + "External id": 289942,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095407.649, "dur": 0.165, + "args": { + "External id": 289943,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367095426.514, "dur": 42.792, + "args": { + "External id": 289944,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367095502.676, "dur": 156.411, + "args": { + "External id": 289945,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367095514.607, "dur": 3.594, + "args": { + "External id": 289946,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367095522.699, "dur": 13.603, + "args": { + "External id": 289947,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367095527.393, "dur": 8.458, + "args": { + "External id": 289948,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095533.893, "dur": 0.441, + "args": { + "External id": 289949,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367095542.638, "dur": 30.760, + "args": { + "External id": 289950,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095544.587, "dur": 3.341, + "args": { + "External id": 289951,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095549.441, "dur": 0.243, + "args": { + "External id": 289952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095550.846, "dur": 0.232, + "args": { + "External id": 289953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095554.759, "dur": 1.495, + "args": { + "External id": 289954,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095557.262, "dur": 0.369, + "args": { + "External id": 289955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095558.276, "dur": 0.385, + "args": { + "External id": 289956,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095562.329, "dur": 0.452, + "args": { + "External id": 289957,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095564.064, "dur": 0.399, + "args": { + "External id": 289958,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367095565.451, "dur": 2.533, + "args": { + "External id": 289959,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367095587.242, "dur": 26.233, + "args": { + "External id": 289960,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367095719.564, "dur": 298.567, + "args": { + "External id": 289961,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367095754.340, "dur": 259.362, + "args": { + "External id": 289962,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5494, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367095764.952, "dur": 243.318, + "args": { + "External id": 289963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367096039.558, "dur": 2.296, + "args": { + "External id": 289964,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5496, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070554, "tid": 2070554, + "ts": 5333367096128.087, "dur": 17186.793, + "args": { + "External id": 289965,"Record function id": 0, "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096246.535, "dur": 7.047, + "args": { + "External id": 289966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096257.768, "dur": 0.971, + "args": { + "External id": 289967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096260.648, "dur": 1.843, + "args": { + "External id": 289968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096264.372, "dur": 0.477, + "args": { + "External id": 289969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096266.299, "dur": 0.756, + "args": { + "External id": 289970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096268.314, "dur": 1.308, + "args": { + "External id": 289971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096274.035, "dur": 1.013, + "args": { + "External id": 289972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096276.423, "dur": 2.082, + "args": { + "External id": 289973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096279.824, "dur": 0.935, + "args": { + "External id": 289974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367096282.136, "dur": 0.854, + "args": { + "External id": 289975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367096304.115, "dur": 16962.385, + "args": { + "External id": 289976,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367096320.169, "dur": 16938.791, + "args": { + "External id": 289977,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367096341.829, "dur": 14.063, + "args": { + "External id": 289978,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367096359.663, "dur": 16861.813, + "args": { + "External id": 289979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367096362.558, "dur": 16858.229, + "args": { + "External id": 289980,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367096368.745, "dur": 7.116, + "args": { + "External id": 289981,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367096377.333, "dur": 16839.972, + "args": { + "External id": 289982,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367113459.599, "dur": 31.736, + "args": { + "External id": 289983,"Sequence number": 1209175, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5515 + } + }, + { + "ph": "s", "id": 57, "pid": 2070554, "tid": 2070554, "ts": 5333367113459.599, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367113478.089, "dur": 8.643, + "args": { + "External id": 289984,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367113481.817, "dur": 4.697, + "args": { + "External id": 289985,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367113557.047, "dur": 145.002, + "args": { + "External id": 289986,"Record function id": 0, "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367113705.723, "dur": 1113.683, + "args": { + "External id": 289987,"Record function id": 0, "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367113750.510, "dur": 1055.299, + "args": { + "External id": 289988,"Sequence number": 1209176, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5520 + } + }, + { + "ph": "s", "id": 56, "pid": 2070554, "tid": 2070554, "ts": 5333367113750.510, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367113821.786, "dur": 49.278, + "args": { + "External id": 289989,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367113884.351, "dur": 105.911, + "args": { + "External id": 289990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367113999.994, "dur": 39.751, + "args": { + "External id": 289991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367114047.997, "dur": 31.950, + "args": { + "External id": 289992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367114105.678, "dur": 27.225, + "args": { + "External id": 289993,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367114152.690, "dur": 30.248, + "args": { + "External id": 289994,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367114204.855, "dur": 131.075, + "args": { + "External id": 289995,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367114254.400, "dur": 11.352, + "args": { + "External id": 289996,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367114259.338, "dur": 5.379, + "args": { + "External id": 289997,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367114268.336, "dur": 5.863, + "args": { + "External id": 289998,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367114275.564, "dur": 1.049, + "args": { + "External id": 289999,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367114279.231, "dur": 4.901, + "args": { + "External id": 290000,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367114346.135, "dur": 49.412, + "args": { + "External id": 290001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367114422.842, "dur": 28.340, + "args": { + "External id": 290002,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367114460.871, "dur": 42.388, + "args": { + "External id": 290003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367114511.909, "dur": 35.678, + "args": { + "External id": 290004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367114570.242, "dur": 25.572, + "args": { + "External id": 290005,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367114601.193, "dur": 82.538, + "args": { + "External id": 290006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367114708.991, "dur": 21.081, + "args": { + "External id": 290007,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2070554, "tid": 2070554, + "ts": 5333367114886.064, "dur": 73.304, + "args": { + "External id": 290008,"Record function id": 0, "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367115032.357, "dur": 45.597, + "args": { + "External id": 290009,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2070554, "tid": 2070554, + "ts": 5333367115087.420, "dur": 18274.342, + "args": { + "External id": 290010,"Record function id": 0, "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070554, "tid": 2070554, + "ts": 5333367115096.169, "dur": 844.166, + "args": { + "External id": 290011,"Record function id": 0, "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367115192.521, "dur": 10.295, + "args": { + "External id": 290012,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367115217.185, "dur": 36.574, + "args": { + "External id": 290013,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115222.386, "dur": 2.526, + "args": { + "External id": 290014,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115229.456, "dur": 0.237, + "args": { + "External id": 290015,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115231.344, "dur": 0.420, + "args": { + "External id": 290016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115232.597, "dur": 0.700, + "args": { + "External id": 290017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115236.664, "dur": 0.557, + "args": { + "External id": 290018,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115238.389, "dur": 0.241, + "args": { + "External id": 290019,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115239.457, "dur": 3.528, + "args": { + "External id": 290020,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115244.405, "dur": 0.570, + "args": { + "External id": 290021,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115246.327, "dur": 0.384, + "args": { + "External id": 290022,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367115264.572, "dur": 43.982, + "args": { + "External id": 290023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367115343.756, "dur": 111.727, + "args": { + "External id": 290024,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367115356.536, "dur": 3.622, + "args": { + "External id": 290025,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367115365.414, "dur": 10.839, + "args": { + "External id": 290026,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367115370.132, "dur": 5.657, + "args": { + "External id": 290027,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115374.000, "dur": 0.435, + "args": { + "External id": 290028,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367115382.763, "dur": 29.401, + "args": { + "External id": 290029,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115385.018, "dur": 3.375, + "args": { + "External id": 290030,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115389.598, "dur": 0.434, + "args": { + "External id": 290031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115391.107, "dur": 0.246, + "args": { + "External id": 290032,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115394.652, "dur": 1.635, + "args": { + "External id": 290033,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115397.162, "dur": 0.365, + "args": { + "External id": 290034,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115398.424, "dur": 0.509, + "args": { + "External id": 290035,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115401.763, "dur": 0.163, + "args": { + "External id": 290036,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115403.063, "dur": 0.396, + "args": { + "External id": 290037,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367115404.537, "dur": 3.061, + "args": { + "External id": 290038,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367115422.158, "dur": 24.935, + "args": { + "External id": 290039,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367115508.913, "dur": 338.321, + "args": { + "External id": 290040,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367115543.087, "dur": 298.653, + "args": { + "External id": 290041,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5573, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367115553.885, "dur": 281.219, + "args": { + "External id": 290042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367115871.682, "dur": 2.514, + "args": { + "External id": 290043,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5575, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070554, "tid": 2070554, + "ts": 5333367115962.028, "dur": 17190.176, + "args": { + "External id": 290044,"Record function id": 0, "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116060.966, "dur": 6.340, + "args": { + "External id": 290045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116071.255, "dur": 0.981, + "args": { + "External id": 290046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116074.172, "dur": 1.978, + "args": { + "External id": 290047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116078.113, "dur": 1.160, + "args": { + "External id": 290048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116080.516, "dur": 0.972, + "args": { + "External id": 290049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116082.897, "dur": 0.884, + "args": { + "External id": 290050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116087.909, "dur": 1.212, + "args": { + "External id": 290051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116090.494, "dur": 3.060, + "args": { + "External id": 290052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116095.145, "dur": 1.026, + "args": { + "External id": 290053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367116097.513, "dur": 0.681, + "args": { + "External id": 290054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367116118.355, "dur": 16986.373, + "args": { + "External id": 290055,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367116133.467, "dur": 16963.680, + "args": { + "External id": 290056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367116154.435, "dur": 30.978, + "args": { + "External id": 290057,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367116190.074, "dur": 16870.955, + "args": { + "External id": 290058,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367116192.631, "dur": 16867.815, + "args": { + "External id": 290059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367116199.527, "dur": 6.137, + "args": { + "External id": 290060,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367116207.206, "dur": 16850.045, + "args": { + "External id": 290061,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367133306.628, "dur": 30.711, + "args": { + "External id": 290062,"Sequence number": 1209177, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5594 + } + }, + { + "ph": "s", "id": 55, "pid": 2070554, "tid": 2070554, "ts": 5333367133306.628, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367133322.884, "dur": 9.752, + "args": { + "External id": 290063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367133327.093, "dur": 5.208, + "args": { + "External id": 290064,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367133402.975, "dur": 98.044, + "args": { + "External id": 290065,"Record function id": 0, "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367133503.147, "dur": 1167.638, + "args": { + "External id": 290066,"Record function id": 0, "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367133544.045, "dur": 1111.616, + "args": { + "External id": 290067,"Sequence number": 1209178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5599 + } + }, + { + "ph": "s", "id": 54, "pid": 2070554, "tid": 2070554, "ts": 5333367133544.045, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367133610.106, "dur": 96.781, + "args": { + "External id": 290068,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367133723.580, "dur": 107.521, + "args": { + "External id": 290069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367133843.439, "dur": 40.805, + "args": { + "External id": 290070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367133893.060, "dur": 31.376, + "args": { + "External id": 290071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367133952.289, "dur": 28.728, + "args": { + "External id": 290072,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367133999.809, "dur": 15.673, + "args": { + "External id": 290073,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367134034.973, "dur": 154.579, + "args": { + "External id": 290074,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367134092.029, "dur": 10.594, + "args": { + "External id": 290075,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367134097.139, "dur": 4.784, + "args": { + "External id": 290076,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367134105.354, "dur": 5.049, + "args": { + "External id": 290077,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367134111.683, "dur": 1.480, + "args": { + "External id": 290078,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367134115.572, "dur": 4.741, + "args": { + "External id": 290079,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367134202.187, "dur": 55.314, + "args": { + "External id": 290080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367134290.243, "dur": 32.619, + "args": { + "External id": 290081,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367134330.372, "dur": 43.292, + "args": { + "External id": 290082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367134385.045, "dur": 36.528, + "args": { + "External id": 290083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367134442.359, "dur": 27.124, + "args": { + "External id": 290084,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367134475.004, "dur": 34.947, + "args": { + "External id": 290085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367134532.838, "dur": 18.365, + "args": { + "External id": 290086,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2070554, "tid": 2070554, + "ts": 5333367134738.693, "dur": 77.042, + "args": { + "External id": 290087,"Record function id": 0, "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367134887.892, "dur": 47.170, + "args": { + "External id": 290088,"Record function id": 0, "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2070554, "tid": 2070554, + "ts": 5333367134944.203, "dur": 18281.269, + "args": { + "External id": 290089,"Record function id": 0, "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070554, "tid": 2070554, + "ts": 5333367134951.542, "dur": 869.858, + "args": { + "External id": 290090,"Record function id": 0, "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367135033.207, "dur": 9.452, + "args": { + "External id": 290091,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367135057.703, "dur": 41.213, + "args": { + "External id": 290092,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135063.352, "dur": 2.441, + "args": { + "External id": 290093,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135071.115, "dur": 0.376, + "args": { + "External id": 290094,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135073.218, "dur": 0.731, + "args": { + "External id": 290095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135075.594, "dur": 0.391, + "args": { + "External id": 290096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135079.967, "dur": 0.479, + "args": { + "External id": 290097,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135081.843, "dur": 0.397, + "args": { + "External id": 290098,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135083.840, "dur": 4.202, + "args": { + "External id": 290099,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135089.759, "dur": 0.407, + "args": { + "External id": 290100,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135091.809, "dur": 0.394, + "args": { + "External id": 290101,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367135109.846, "dur": 43.466, + "args": { + "External id": 290102,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367135206.696, "dur": 131.165, + "args": { + "External id": 290103,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367135220.596, "dur": 5.561, + "args": { + "External id": 290104,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367135231.308, "dur": 11.728, + "args": { + "External id": 290105,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367135235.971, "dur": 6.623, + "args": { + "External id": 290106,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135240.034, "dur": 0.864, + "args": { + "External id": 290107,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367135250.293, "dur": 35.883, + "args": { + "External id": 290108,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135252.632, "dur": 3.290, + "args": { + "External id": 290109,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135257.433, "dur": 0.627, + "args": { + "External id": 290110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135259.872, "dur": 0.458, + "args": { + "External id": 290111,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135264.493, "dur": 1.332, + "args": { + "External id": 290112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135269.066, "dur": 0.275, + "args": { + "External id": 290113,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135270.787, "dur": 0.328, + "args": { + "External id": 290114,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135275.255, "dur": 0.463, + "args": { + "External id": 290115,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135277.191, "dur": 0.414, + "args": { + "External id": 290116,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367135278.803, "dur": 2.588, + "args": { + "External id": 290117,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367135300.901, "dur": 28.040, + "args": { + "External id": 290118,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367135391.960, "dur": 333.288, + "args": { + "External id": 290119,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367135424.504, "dur": 295.427, + "args": { + "External id": 290120,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5652, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367135435.663, "dur": 277.919, + "args": { + "External id": 290121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367135749.435, "dur": 2.508, + "args": { + "External id": 290122,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5654, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070554, "tid": 2070554, + "ts": 5333367135842.818, "dur": 17170.953, + "args": { + "External id": 290123,"Record function id": 0, "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135938.077, "dur": 6.466, + "args": { + "External id": 290124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135947.634, "dur": 0.853, + "args": { + "External id": 290125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135950.090, "dur": 2.288, + "args": { + "External id": 290126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135954.307, "dur": 0.730, + "args": { + "External id": 290127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135956.378, "dur": 0.963, + "args": { + "External id": 290128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135958.553, "dur": 0.923, + "args": { + "External id": 290129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135963.457, "dur": 1.053, + "args": { + "External id": 290130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135966.321, "dur": 2.281, + "args": { + "External id": 290131,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135970.320, "dur": 0.853, + "args": { + "External id": 290132,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367135972.772, "dur": 1.325, + "args": { + "External id": 290133,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367135995.978, "dur": 16974.763, + "args": { + "External id": 290134,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367136011.643, "dur": 16951.193, + "args": { + "External id": 290135,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367136031.542, "dur": 15.659, + "args": { + "External id": 290136,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367136050.685, "dur": 16877.720, + "args": { + "External id": 290137,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367136053.437, "dur": 16874.213, + "args": { + "External id": 290138,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367136060.146, "dur": 6.700, + "args": { + "External id": 290139,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367136068.733, "dur": 16855.604, + "args": { + "External id": 290140,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367153154.181, "dur": 44.754, + "args": { + "External id": 290141,"Sequence number": 1209179, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5673 + } + }, + { + "ph": "s", "id": 53, "pid": 2070554, "tid": 2070554, "ts": 5333367153154.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367153183.888, "dur": 10.134, + "args": { + "External id": 290142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367153188.137, "dur": 5.554, + "args": { + "External id": 290143,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367153264.552, "dur": 95.829, + "args": { + "External id": 290144,"Record function id": 0, "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367153362.256, "dur": 1091.692, + "args": { + "External id": 290145,"Record function id": 0, "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367153405.505, "dur": 1035.691, + "args": { + "External id": 290146,"Sequence number": 1209180, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5678 + } + }, + { + "ph": "s", "id": 52, "pid": 2070554, "tid": 2070554, "ts": 5333367153405.505, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367153472.993, "dur": 47.323, + "args": { + "External id": 290147,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367153533.666, "dur": 130.787, + "args": { + "External id": 290148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367153678.515, "dur": 44.429, + "args": { + "External id": 290149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367153732.440, "dur": 31.835, + "args": { + "External id": 290150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367153792.125, "dur": 29.643, + "args": { + "External id": 290151,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367153839.098, "dur": 18.275, + "args": { + "External id": 290152,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367153881.008, "dur": 132.488, + "args": { + "External id": 290153,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367153930.755, "dur": 10.988, + "args": { + "External id": 290154,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367153936.067, "dur": 4.942, + "args": { + "External id": 290155,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367153944.637, "dur": 6.605, + "args": { + "External id": 290156,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367153952.648, "dur": 1.181, + "args": { + "External id": 290157,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367153956.074, "dur": 3.963, + "args": { + "External id": 290158,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367154023.170, "dur": 45.467, + "args": { + "External id": 290159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367154098.819, "dur": 28.386, + "args": { + "External id": 290160,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367154135.202, "dur": 57.524, + "args": { + "External id": 290161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367154204.380, "dur": 39.078, + "args": { + "External id": 290162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367154265.315, "dur": 24.845, + "args": { + "External id": 290163,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367154295.628, "dur": 35.836, + "args": { + "External id": 290164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367154353.589, "dur": 18.719, + "args": { + "External id": 290165,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2070554, "tid": 2070554, + "ts": 5333367154519.659, "dur": 76.528, + "args": { + "External id": 290166,"Record function id": 0, "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367154712.217, "dur": 48.171, + "args": { + "External id": 290167,"Record function id": 0, "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2070554, "tid": 2070554, + "ts": 5333367154770.215, "dur": 18278.044, + "args": { + "External id": 290168,"Record function id": 0, "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070554, "tid": 2070554, + "ts": 5333367154779.235, "dur": 816.358, + "args": { + "External id": 290169,"Record function id": 0, "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367154862.417, "dur": 9.058, + "args": { + "External id": 290170,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367154885.425, "dur": 39.519, + "args": { + "External id": 290171,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154891.289, "dur": 2.334, + "args": { + "External id": 290172,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154898.326, "dur": 0.614, + "args": { + "External id": 290173,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154900.445, "dur": 0.467, + "args": { + "External id": 290174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154902.423, "dur": 0.457, + "args": { + "External id": 290175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154906.670, "dur": 0.326, + "args": { + "External id": 290176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154908.654, "dur": 0.425, + "args": { + "External id": 290177,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154910.459, "dur": 4.093, + "args": { + "External id": 290178,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154916.372, "dur": 0.378, + "args": { + "External id": 290179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367154918.115, "dur": 0.598, + "args": { + "External id": 290180,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367154936.133, "dur": 44.802, + "args": { + "External id": 290181,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367155014.684, "dur": 120.328, + "args": { + "External id": 290182,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367155027.591, "dur": 4.068, + "args": { + "External id": 290183,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367155036.811, "dur": 10.653, + "args": { + "External id": 290184,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367155041.497, "dur": 5.523, + "args": { + "External id": 290185,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155045.098, "dur": 0.572, + "args": { + "External id": 290186,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367155054.716, "dur": 33.773, + "args": { + "External id": 290187,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155057.195, "dur": 2.880, + "args": { + "External id": 290188,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155061.649, "dur": 0.214, + "args": { + "External id": 290189,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155063.443, "dur": 0.396, + "args": { + "External id": 290190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155068.569, "dur": 1.444, + "args": { + "External id": 290191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155071.282, "dur": 0.393, + "args": { + "External id": 290192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155073.408, "dur": 0.429, + "args": { + "External id": 290193,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155076.975, "dur": 0.340, + "args": { + "External id": 290194,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155078.493, "dur": 0.465, + "args": { + "External id": 290195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155080.299, "dur": 3.196, + "args": { + "External id": 290196,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367155099.751, "dur": 27.226, + "args": { + "External id": 290197,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367155209.253, "dur": 294.381, + "args": { + "External id": 290198,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367155243.277, "dur": 255.677, + "args": { + "External id": 290199,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5731, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367155254.478, "dur": 238.743, + "args": { + "External id": 290200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367155526.335, "dur": 2.631, + "args": { + "External id": 290201,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5733, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070554, "tid": 2070554, + "ts": 5333367155616.506, "dur": 17228.142, + "args": { + "External id": 290202,"Record function id": 0, "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155756.595, "dur": 7.089, + "args": { + "External id": 290203,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155767.261, "dur": 1.132, + "args": { + "External id": 290204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155770.051, "dur": 2.145, + "args": { + "External id": 290205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155774.079, "dur": 0.938, + "args": { + "External id": 290206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155776.492, "dur": 0.997, + "args": { + "External id": 290207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155778.974, "dur": 1.115, + "args": { + "External id": 290208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155783.537, "dur": 0.897, + "args": { + "External id": 290209,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155786.223, "dur": 2.610, + "args": { + "External id": 290210,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155790.293, "dur": 0.868, + "args": { + "External id": 290211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367155792.701, "dur": 0.947, + "args": { + "External id": 290212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367155815.640, "dur": 16980.814, + "args": { + "External id": 290213,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367155832.464, "dur": 16956.025, + "args": { + "External id": 290214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367155857.038, "dur": 15.470, + "args": { + "External id": 290215,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367155876.108, "dur": 16874.317, + "args": { + "External id": 290216,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367155878.932, "dur": 16870.810, + "args": { + "External id": 290217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367155885.425, "dur": 5.303, + "args": { + "External id": 290218,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367155892.659, "dur": 16853.498, + "args": { + "External id": 290219,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367172991.045, "dur": 32.218, + "args": { + "External id": 290220,"Sequence number": 1209181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5752 + } + }, + { + "ph": "s", "id": 51, "pid": 2070554, "tid": 2070554, "ts": 5333367172991.045, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367173009.270, "dur": 9.302, + "args": { + "External id": 290221,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367173013.253, "dur": 5.109, + "args": { + "External id": 290222,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367173087.851, "dur": 110.303, + "args": { + "External id": 290223,"Record function id": 0, "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367173201.771, "dur": 1124.032, + "args": { + "External id": 290224,"Record function id": 0, "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367173246.048, "dur": 1066.248, + "args": { + "External id": 290225,"Sequence number": 1209182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5757 + } + }, + { + "ph": "s", "id": 50, "pid": 2070554, "tid": 2070554, "ts": 5333367173246.048, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367173314.208, "dur": 57.314, + "args": { + "External id": 290226,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367173384.560, "dur": 105.447, + "args": { + "External id": 290227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367173501.403, "dur": 37.537, + "args": { + "External id": 290228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367173547.084, "dur": 31.134, + "args": { + "External id": 290229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367173607.093, "dur": 70.090, + "args": { + "External id": 290230,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367173698.143, "dur": 17.855, + "args": { + "External id": 290231,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367173735.286, "dur": 135.097, + "args": { + "External id": 290232,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367173786.276, "dur": 12.074, + "args": { + "External id": 290233,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367173791.504, "dur": 5.954, + "args": { + "External id": 290234,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367173801.272, "dur": 5.566, + "args": { + "External id": 290235,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367173808.409, "dur": 1.201, + "args": { + "External id": 290236,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367173812.385, "dur": 5.572, + "args": { + "External id": 290237,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367173881.109, "dur": 52.896, + "args": { + "External id": 290238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367173965.725, "dur": 29.698, + "args": { + "External id": 290239,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367174004.025, "dur": 42.465, + "args": { + "External id": 290240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367174052.184, "dur": 35.403, + "args": { + "External id": 290241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367174111.627, "dur": 26.872, + "args": { + "External id": 290242,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367174146.880, "dur": 50.621, + "args": { + "External id": 290243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367174220.382, "dur": 20.458, + "args": { + "External id": 290244,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2070554, "tid": 2070554, + "ts": 5333367174391.982, "dur": 74.899, + "args": { + "External id": 290245,"Record function id": 0, "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367174539.531, "dur": 46.247, + "args": { + "External id": 290246,"Record function id": 0, "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2070554, "tid": 2070554, + "ts": 5333367174595.463, "dur": 18345.044, + "args": { + "External id": 290247,"Record function id": 0, "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070554, "tid": 2070554, + "ts": 5333367174603.637, "dur": 920.713, + "args": { + "External id": 290248,"Record function id": 0, "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367174721.358, "dur": 9.259, + "args": { + "External id": 290249,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367174745.129, "dur": 41.479, + "args": { + "External id": 290250,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174750.894, "dur": 2.620, + "args": { + "External id": 290251,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174758.537, "dur": 0.217, + "args": { + "External id": 290252,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174760.626, "dur": 0.335, + "args": { + "External id": 290253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174762.515, "dur": 0.644, + "args": { + "External id": 290254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174767.638, "dur": 0.636, + "args": { + "External id": 290255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174769.668, "dur": 0.434, + "args": { + "External id": 290256,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174771.525, "dur": 3.480, + "args": { + "External id": 290257,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174776.637, "dur": 0.379, + "args": { + "External id": 290258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174778.728, "dur": 0.205, + "args": { + "External id": 290259,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367174798.978, "dur": 45.129, + "args": { + "External id": 290260,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367174879.156, "dur": 127.284, + "args": { + "External id": 290261,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367174893.033, "dur": 3.968, + "args": { + "External id": 290262,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367174901.842, "dur": 10.562, + "args": { + "External id": 290263,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367174906.293, "dur": 5.674, + "args": { + "External id": 290264,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174909.992, "dur": 0.509, + "args": { + "External id": 290265,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367174919.738, "dur": 33.975, + "args": { + "External id": 290266,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174921.959, "dur": 2.940, + "args": { + "External id": 290267,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174926.535, "dur": 0.220, + "args": { + "External id": 290268,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174928.253, "dur": 0.386, + "args": { + "External id": 290269,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174933.008, "dur": 2.123, + "args": { + "External id": 290270,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174936.780, "dur": 0.162, + "args": { + "External id": 290271,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174938.355, "dur": 0.489, + "args": { + "External id": 290272,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174942.253, "dur": 0.358, + "args": { + "External id": 290273,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174943.877, "dur": 0.481, + "args": { + "External id": 290274,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367174945.948, "dur": 2.842, + "args": { + "External id": 290275,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367174971.374, "dur": 25.616, + "args": { + "External id": 290276,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367175058.930, "dur": 365.671, + "args": { + "External id": 290277,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367175101.045, "dur": 318.490, + "args": { + "External id": 290278,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5810, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367175112.273, "dur": 298.770, + "args": { + "External id": 290279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367175450.165, "dur": 2.301, + "args": { + "External id": 290280,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5812, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070554, "tid": 2070554, + "ts": 5333367175547.264, "dur": 17194.896, + "args": { + "External id": 290281,"Record function id": 0, "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175685.950, "dur": 6.555, + "args": { + "External id": 290282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175696.346, "dur": 1.169, + "args": { + "External id": 290283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175699.331, "dur": 2.225, + "args": { + "External id": 290284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175703.318, "dur": 1.039, + "args": { + "External id": 290285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175705.810, "dur": 0.585, + "args": { + "External id": 290286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175709.684, "dur": 0.844, + "args": { + "External id": 290287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175712.510, "dur": 1.000, + "args": { + "External id": 290288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175714.966, "dur": 2.596, + "args": { + "External id": 290289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175719.269, "dur": 0.899, + "args": { + "External id": 290290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367175723.656, "dur": 1.089, + "args": { + "External id": 290291,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367175745.190, "dur": 16952.286, + "args": { + "External id": 290292,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367175763.232, "dur": 16925.804, + "args": { + "External id": 290293,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367175784.479, "dur": 16.227, + "args": { + "External id": 290294,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367175804.544, "dur": 16845.517, + "args": { + "External id": 290295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367175807.112, "dur": 16842.195, + "args": { + "External id": 290296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367175813.890, "dur": 6.204, + "args": { + "External id": 290297,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367175821.790, "dur": 16824.206, + "args": { + "External id": 290298,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367192881.621, "dur": 34.271, + "args": { + "External id": 290299,"Sequence number": 1209183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5831 + } + }, + { + "ph": "s", "id": 49, "pid": 2070554, "tid": 2070554, "ts": 5333367192881.621, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367192901.989, "dur": 9.014, + "args": { + "External id": 290300,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367192905.880, "dur": 4.932, + "args": { + "External id": 290301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367192980.110, "dur": 95.825, + "args": { + "External id": 290302,"Record function id": 0, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367193077.748, "dur": 1137.944, + "args": { + "External id": 290303,"Record function id": 0, "Ev Idx": 5835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367193116.347, "dur": 1085.413, + "args": { + "External id": 290304,"Sequence number": 1209184, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5836 + } + }, + { + "ph": "s", "id": 48, "pid": 2070554, "tid": 2070554, "ts": 5333367193116.347, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367193226.508, "dur": 50.837, + "args": { + "External id": 290305,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367193295.273, "dur": 106.836, + "args": { + "External id": 290306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367193412.709, "dur": 39.778, + "args": { + "External id": 290307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367193458.412, "dur": 30.760, + "args": { + "External id": 290308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367193518.436, "dur": 26.126, + "args": { + "External id": 290309,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367193559.317, "dur": 16.084, + "args": { + "External id": 290310,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367193593.390, "dur": 177.751, + "args": { + "External id": 290311,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367193687.387, "dur": 12.780, + "args": { + "External id": 290312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367193692.834, "dur": 6.055, + "args": { + "External id": 290313,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367193703.105, "dur": 4.872, + "args": { + "External id": 290314,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367193709.499, "dur": 3.067, + "args": { + "External id": 290315,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367193715.119, "dur": 4.814, + "args": { + "External id": 290316,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367193782.291, "dur": 51.330, + "args": { + "External id": 290317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367193864.009, "dur": 28.884, + "args": { + "External id": 290318,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367193902.433, "dur": 42.478, + "args": { + "External id": 290319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367193952.126, "dur": 34.688, + "args": { + "External id": 290320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367194011.812, "dur": 26.086, + "args": { + "External id": 290321,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367194043.271, "dur": 35.019, + "args": { + "External id": 290322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367194096.967, "dur": 22.625, + "args": { + "External id": 290323,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2070554, "tid": 2070554, + "ts": 5333367194282.955, "dur": 74.742, + "args": { + "External id": 290324,"Record function id": 0, "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367194431.751, "dur": 47.834, + "args": { + "External id": 290325,"Record function id": 0, "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2070554, "tid": 2070554, + "ts": 5333367194488.759, "dur": 18344.502, + "args": { + "External id": 290326,"Record function id": 0, "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070554, "tid": 2070554, + "ts": 5333367194497.166, "dur": 866.122, + "args": { + "External id": 290327,"Record function id": 0, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367194577.209, "dur": 8.334, + "args": { + "External id": 290328,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367194598.667, "dur": 77.160, + "args": { + "External id": 290329,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194604.034, "dur": 2.051, + "args": { + "External id": 290330,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194610.844, "dur": 0.407, + "args": { + "External id": 290331,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194612.737, "dur": 0.636, + "args": { + "External id": 290332,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194615.133, "dur": 0.425, + "args": { + "External id": 290333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194656.321, "dur": 0.638, + "args": { + "External id": 290334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194658.988, "dur": 0.420, + "args": { + "External id": 290335,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194661.179, "dur": 3.686, + "args": { + "External id": 290336,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194666.437, "dur": 0.574, + "args": { + "External id": 290337,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194668.473, "dur": 0.326, + "args": { + "External id": 290338,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367194687.854, "dur": 49.098, + "args": { + "External id": 290339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367194773.793, "dur": 122.459, + "args": { + "External id": 290340,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367194787.549, "dur": 5.558, + "args": { + "External id": 290341,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367194798.328, "dur": 10.848, + "args": { + "External id": 290342,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367194802.912, "dur": 5.816, + "args": { + "External id": 290343,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194806.600, "dur": 0.842, + "args": { + "External id": 290344,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367194816.678, "dur": 34.005, + "args": { + "External id": 290345,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194818.930, "dur": 2.987, + "args": { + "External id": 290346,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194823.522, "dur": 0.408, + "args": { + "External id": 290347,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194825.572, "dur": 0.193, + "args": { + "External id": 290348,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194830.177, "dur": 1.406, + "args": { + "External id": 290349,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194832.999, "dur": 0.283, + "args": { + "External id": 290350,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194834.874, "dur": 0.465, + "args": { + "External id": 290351,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194838.775, "dur": 0.327, + "args": { + "External id": 290352,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194840.471, "dur": 0.477, + "args": { + "External id": 290353,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367194842.468, "dur": 2.654, + "args": { + "External id": 290354,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367194862.848, "dur": 25.433, + "args": { + "External id": 290355,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367194952.037, "dur": 315.725, + "args": { + "External id": 290356,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367194986.021, "dur": 276.516, + "args": { + "External id": 290357,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5889, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367194996.643, "dur": 259.736, + "args": { + "External id": 290358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367195292.059, "dur": 2.611, + "args": { + "External id": 290359,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5891, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070554, "tid": 2070554, + "ts": 5333367195385.216, "dur": 17217.806, + "args": { + "External id": 290360,"Record function id": 0, "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195482.316, "dur": 6.529, + "args": { + "External id": 290361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195492.395, "dur": 1.064, + "args": { + "External id": 290362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195495.078, "dur": 2.010, + "args": { + "External id": 290363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195498.681, "dur": 0.954, + "args": { + "External id": 290364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195500.939, "dur": 0.762, + "args": { + "External id": 290365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195502.842, "dur": 0.706, + "args": { + "External id": 290366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195507.190, "dur": 0.778, + "args": { + "External id": 290367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195509.443, "dur": 2.835, + "args": { + "External id": 290368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195513.848, "dur": 0.662, + "args": { + "External id": 290369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367195516.009, "dur": 0.961, + "args": { + "External id": 290370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367195538.620, "dur": 17017.226, + "args": { + "External id": 290371,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367195553.949, "dur": 16993.136, + "args": { + "External id": 290372,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367195582.652, "dur": 15.191, + "args": { + "External id": 290373,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367195601.748, "dur": 16908.250, + "args": { + "External id": 290374,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367195604.018, "dur": 16905.260, + "args": { + "External id": 290375,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367195611.071, "dur": 5.289, + "args": { + "External id": 290376,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367195618.079, "dur": 16887.466, + "args": { + "External id": 290377,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367212771.798, "dur": 35.457, + "args": { + "External id": 290378,"Sequence number": 1209185, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5910 + } + }, + { + "ph": "s", "id": 47, "pid": 2070554, "tid": 2070554, "ts": 5333367212771.798, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367212792.217, "dur": 10.382, + "args": { + "External id": 290379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367212796.710, "dur": 5.362, + "args": { + "External id": 290380,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367212874.224, "dur": 95.866, + "args": { + "External id": 290381,"Record function id": 0, "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367212972.167, "dur": 1110.417, + "args": { + "External id": 290382,"Record function id": 0, "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367213011.706, "dur": 1057.461, + "args": { + "External id": 290383,"Sequence number": 1209186, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5915 + } + }, + { + "ph": "s", "id": 46, "pid": 2070554, "tid": 2070554, "ts": 5333367213011.706, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367213083.291, "dur": 48.032, + "args": { + "External id": 290384,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367213146.187, "dur": 124.920, + "args": { + "External id": 290385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367213284.569, "dur": 40.948, + "args": { + "External id": 290386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367213331.180, "dur": 31.635, + "args": { + "External id": 290387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367213390.167, "dur": 29.095, + "args": { + "External id": 290388,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367213437.245, "dur": 17.637, + "args": { + "External id": 290389,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367213471.513, "dur": 133.684, + "args": { + "External id": 290390,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367213522.684, "dur": 11.719, + "args": { + "External id": 290391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367213528.305, "dur": 5.319, + "args": { + "External id": 290392,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367213537.300, "dur": 5.401, + "args": { + "External id": 290393,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367213544.101, "dur": 3.585, + "args": { + "External id": 290394,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367213550.040, "dur": 5.255, + "args": { + "External id": 290395,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367213616.344, "dur": 93.654, + "args": { + "External id": 290396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367213744.189, "dur": 30.220, + "args": { + "External id": 290397,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367213784.021, "dur": 44.338, + "args": { + "External id": 290398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367213836.811, "dur": 35.001, + "args": { + "External id": 290399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367213896.839, "dur": 26.782, + "args": { + "External id": 290400,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367213928.696, "dur": 34.284, + "args": { + "External id": 290401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367213980.417, "dur": 20.985, + "args": { + "External id": 290402,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2070554, "tid": 2070554, + "ts": 5333367214147.703, "dur": 97.098, + "args": { + "External id": 290403,"Record function id": 0, "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367214320.350, "dur": 47.689, + "args": { + "External id": 290404,"Record function id": 0, "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2070554, "tid": 2070554, + "ts": 5333367214376.673, "dur": 18287.516, + "args": { + "External id": 290405,"Record function id": 0, "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070554, "tid": 2070554, + "ts": 5333367214385.438, "dur": 889.891, + "args": { + "External id": 290406,"Record function id": 0, "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367214467.872, "dur": 9.315, + "args": { + "External id": 290407,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367214490.733, "dur": 39.965, + "args": { + "External id": 290408,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214496.495, "dur": 2.406, + "args": { + "External id": 290409,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214503.892, "dur": 0.389, + "args": { + "External id": 290410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214505.929, "dur": 0.240, + "args": { + "External id": 290411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214507.679, "dur": 0.672, + "args": { + "External id": 290412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214512.370, "dur": 0.176, + "args": { + "External id": 290413,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214514.063, "dur": 0.634, + "args": { + "External id": 290414,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214516.363, "dur": 3.300, + "args": { + "External id": 290415,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214521.159, "dur": 0.377, + "args": { + "External id": 290416,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214522.931, "dur": 0.369, + "args": { + "External id": 290417,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367214542.015, "dur": 52.417, + "args": { + "External id": 290418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367214666.319, "dur": 134.637, + "args": { + "External id": 290419,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367214680.913, "dur": 5.846, + "args": { + "External id": 290420,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367214692.150, "dur": 11.582, + "args": { + "External id": 290421,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367214696.691, "dur": 6.591, + "args": { + "External id": 290422,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214700.661, "dur": 0.783, + "args": { + "External id": 290423,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367214711.824, "dur": 35.123, + "args": { + "External id": 290424,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214714.383, "dur": 3.373, + "args": { + "External id": 290425,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214719.302, "dur": 0.509, + "args": { + "External id": 290426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214721.150, "dur": 0.576, + "args": { + "External id": 290427,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214725.986, "dur": 1.652, + "args": { + "External id": 290428,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214729.130, "dur": 0.159, + "args": { + "External id": 290429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214730.836, "dur": 0.400, + "args": { + "External id": 290430,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214735.392, "dur": 0.181, + "args": { + "External id": 290431,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214737.003, "dur": 0.326, + "args": { + "External id": 290432,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367214738.900, "dur": 2.336, + "args": { + "External id": 290433,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367214762.887, "dur": 29.398, + "args": { + "External id": 290434,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367214858.227, "dur": 301.118, + "args": { + "External id": 290435,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367214892.950, "dur": 262.192, + "args": { + "External id": 290436,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5968, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367214908.597, "dur": 241.058, + "args": { + "External id": 290437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367215200.624, "dur": 3.415, + "args": { + "External id": 290438,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5970, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070554, "tid": 2070554, + "ts": 5333367215297.915, "dur": 17135.853, + "args": { + "External id": 290439,"Record function id": 0, "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215396.292, "dur": 6.318, + "args": { + "External id": 290440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215405.825, "dur": 0.834, + "args": { + "External id": 290441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215408.349, "dur": 2.953, + "args": { + "External id": 290442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215413.084, "dur": 1.123, + "args": { + "External id": 290443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215415.485, "dur": 0.973, + "args": { + "External id": 290444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215417.770, "dur": 0.802, + "args": { + "External id": 290445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215422.210, "dur": 0.764, + "args": { + "External id": 290446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215424.596, "dur": 2.992, + "args": { + "External id": 290447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215428.949, "dur": 0.926, + "args": { + "External id": 290448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367215431.236, "dur": 0.954, + "args": { + "External id": 290449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367215452.699, "dur": 16934.682, + "args": { + "External id": 290450,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367215469.640, "dur": 16909.402, + "args": { + "External id": 290451,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367215493.103, "dur": 15.420, + "args": { + "External id": 290452,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367215511.954, "dur": 16829.743, + "args": { + "External id": 290453,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367215514.358, "dur": 16826.656, + "args": { + "External id": 290454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367215520.805, "dur": 7.216, + "args": { + "External id": 290455,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367215529.883, "dur": 16807.867, + "args": { + "External id": 290456,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367232577.258, "dur": 32.130, + "args": { + "External id": 290457,"Sequence number": 1209187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5989 + } + }, + { + "ph": "s", "id": 45, "pid": 2070554, "tid": 2070554, "ts": 5333367232577.258, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367232595.023, "dur": 9.612, + "args": { + "External id": 290458,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367232599.507, "dur": 4.893, + "args": { + "External id": 290459,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367232707.537, "dur": 99.129, + "args": { + "External id": 290460,"Record function id": 0, "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367232808.546, "dur": 1099.125, + "args": { + "External id": 290461,"Record function id": 0, "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367232851.474, "dur": 1042.538, + "args": { + "External id": 290462,"Sequence number": 1209188, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5994 + } + }, + { + "ph": "s", "id": 44, "pid": 2070554, "tid": 2070554, "ts": 5333367232851.474, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367232921.519, "dur": 49.223, + "args": { + "External id": 290463,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367232984.042, "dur": 106.329, + "args": { + "External id": 290464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367233099.038, "dur": 39.185, + "args": { + "External id": 290465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367233146.136, "dur": 49.362, + "args": { + "External id": 290466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367233225.377, "dur": 31.106, + "args": { + "External id": 290467,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367233274.316, "dur": 15.104, + "args": { + "External id": 290468,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367233310.015, "dur": 129.419, + "args": { + "External id": 290469,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367233359.496, "dur": 11.096, + "args": { + "External id": 290470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367233365.014, "dur": 4.737, + "args": { + "External id": 290471,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367233373.294, "dur": 5.916, + "args": { + "External id": 290472,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367233380.582, "dur": 0.841, + "args": { + "External id": 290473,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367233384.020, "dur": 4.703, + "args": { + "External id": 290474,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367233449.567, "dur": 49.736, + "args": { + "External id": 290475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367233526.578, "dur": 29.048, + "args": { + "External id": 290476,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367233564.214, "dur": 41.765, + "args": { + "External id": 290477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367233613.960, "dur": 72.862, + "args": { + "External id": 290478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367233713.995, "dur": 26.995, + "args": { + "External id": 290479,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367233747.005, "dur": 36.870, + "args": { + "External id": 290480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367233803.259, "dur": 19.694, + "args": { + "External id": 290481,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6013 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2070554, "tid": 2070554, + "ts": 5333367233972.876, "dur": 76.298, + "args": { + "External id": 290482,"Record function id": 0, "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367234120.960, "dur": 44.918, + "args": { + "External id": 290483,"Record function id": 0, "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2070554, "tid": 2070554, + "ts": 5333367234193.462, "dur": 18211.381, + "args": { + "External id": 290484,"Record function id": 0, "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070554, "tid": 2070554, + "ts": 5333367234204.152, "dur": 836.360, + "args": { + "External id": 290485,"Record function id": 0, "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367234286.276, "dur": 9.106, + "args": { + "External id": 290486,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367234309.349, "dur": 40.429, + "args": { + "External id": 290487,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234315.161, "dur": 2.291, + "args": { + "External id": 290488,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234322.666, "dur": 0.694, + "args": { + "External id": 290489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234324.852, "dur": 0.256, + "args": { + "External id": 290490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234326.960, "dur": 0.230, + "args": { + "External id": 290491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234330.783, "dur": 0.460, + "args": { + "External id": 290492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234333.253, "dur": 0.299, + "args": { + "External id": 290493,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234335.059, "dur": 3.600, + "args": { + "External id": 290494,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234340.280, "dur": 0.175, + "args": { + "External id": 290495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234341.919, "dur": 0.338, + "args": { + "External id": 290496,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367234362.061, "dur": 43.673, + "args": { + "External id": 290497,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367234439.992, "dur": 121.798, + "args": { + "External id": 290498,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367234452.262, "dur": 4.104, + "args": { + "External id": 290499,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367234461.386, "dur": 10.625, + "args": { + "External id": 290500,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367234466.213, "dur": 5.347, + "args": { + "External id": 290501,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234469.912, "dur": 0.378, + "args": { + "External id": 290502,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367234479.525, "dur": 35.397, + "args": { + "External id": 290503,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234481.800, "dur": 2.576, + "args": { + "External id": 290504,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234486.528, "dur": 0.494, + "args": { + "External id": 290505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234488.599, "dur": 0.465, + "args": { + "External id": 290506,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234493.284, "dur": 2.111, + "args": { + "External id": 290507,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234497.308, "dur": 0.536, + "args": { + "External id": 290508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234499.552, "dur": 0.170, + "args": { + "External id": 290509,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234503.486, "dur": 0.298, + "args": { + "External id": 290510,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234505.125, "dur": 0.362, + "args": { + "External id": 290511,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367234507.102, "dur": 2.127, + "args": { + "External id": 290512,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367234526.567, "dur": 26.688, + "args": { + "External id": 290513,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367234614.015, "dur": 331.337, + "args": { + "External id": 290514,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367234684.865, "dur": 255.825, + "args": { + "External id": 290515,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6047, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367234697.028, "dur": 238.581, + "args": { + "External id": 290516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367234967.497, "dur": 2.156, + "args": { + "External id": 290517,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6049, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070554, "tid": 2070554, + "ts": 5333367235062.067, "dur": 17121.441, + "args": { + "External id": 290518,"Record function id": 0, "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235158.424, "dur": 6.287, + "args": { + "External id": 290519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235186.209, "dur": 1.560, + "args": { + "External id": 290520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235189.876, "dur": 2.245, + "args": { + "External id": 290521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235194.147, "dur": 0.946, + "args": { + "External id": 290522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235196.311, "dur": 0.836, + "args": { + "External id": 290523,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235198.432, "dur": 1.083, + "args": { + "External id": 290524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235203.105, "dur": 0.832, + "args": { + "External id": 290525,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235205.545, "dur": 2.635, + "args": { + "External id": 290526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235209.902, "dur": 0.702, + "args": { + "External id": 290527,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367235211.974, "dur": 0.561, + "args": { + "External id": 290528,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367235235.127, "dur": 16892.723, + "args": { + "External id": 290529,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367235251.328, "dur": 16868.365, + "args": { + "External id": 290530,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367235276.950, "dur": 15.151, + "args": { + "External id": 290531,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367235295.491, "dur": 16787.783, + "args": { + "External id": 290532,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367235298.105, "dur": 16784.568, + "args": { + "External id": 290533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367235304.977, "dur": 5.372, + "args": { + "External id": 290534,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367235312.008, "dur": 16767.303, + "args": { + "External id": 290535,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367252342.132, "dur": 36.777, + "args": { + "External id": 290536,"Sequence number": 1209189, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6068 + } + }, + { + "ph": "s", "id": 43, "pid": 2070554, "tid": 2070554, "ts": 5333367252342.132, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367252364.314, "dur": 9.485, + "args": { + "External id": 290537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367252368.289, "dur": 5.099, + "args": { + "External id": 290538,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367252445.715, "dur": 96.463, + "args": { + "External id": 290539,"Record function id": 0, "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367252543.861, "dur": 1132.593, + "args": { + "External id": 290540,"Record function id": 0, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367252585.186, "dur": 1076.911, + "args": { + "External id": 290541,"Sequence number": 1209190, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6073 + } + }, + { + "ph": "s", "id": 42, "pid": 2070554, "tid": 2070554, "ts": 5333367252585.186, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367252686.557, "dur": 48.641, + "args": { + "External id": 290542,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367252748.203, "dur": 107.231, + "args": { + "External id": 290543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367252868.936, "dur": 40.345, + "args": { + "External id": 290544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367252915.170, "dur": 31.715, + "args": { + "External id": 290545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367252974.879, "dur": 26.596, + "args": { + "External id": 290546,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367253017.937, "dur": 16.716, + "args": { + "External id": 290547,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367253053.674, "dur": 152.010, + "args": { + "External id": 290548,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367253106.080, "dur": 11.450, + "args": { + "External id": 290549,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367253111.538, "dur": 5.249, + "args": { + "External id": 290550,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367253120.503, "dur": 5.068, + "args": { + "External id": 290551,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367253127.021, "dur": 0.903, + "args": { + "External id": 290552,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367253132.789, "dur": 3.864, + "args": { + "External id": 290553,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367253217.948, "dur": 52.090, + "args": { + "External id": 290554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367253301.694, "dur": 30.055, + "args": { + "External id": 290555,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367253340.856, "dur": 42.823, + "args": { + "External id": 290556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367253389.600, "dur": 35.147, + "args": { + "External id": 290557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367253448.186, "dur": 29.150, + "args": { + "External id": 290558,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367253482.677, "dur": 34.634, + "args": { + "External id": 290559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367253536.095, "dur": 20.146, + "args": { + "External id": 290560,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2070554, "tid": 2070554, + "ts": 5333367253744.097, "dur": 76.723, + "args": { + "External id": 290561,"Record function id": 0, "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367253892.782, "dur": 46.077, + "args": { + "External id": 290562,"Record function id": 0, "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2070554, "tid": 2070554, + "ts": 5333367253948.044, "dur": 18253.934, + "args": { + "External id": 290563,"Record function id": 0, "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070554, "tid": 2070554, + "ts": 5333367253956.975, "dur": 874.083, + "args": { + "External id": 290564,"Record function id": 0, "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367254039.274, "dur": 9.420, + "args": { + "External id": 290565,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367254063.105, "dur": 38.637, + "args": { + "External id": 290566,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254068.353, "dur": 2.274, + "args": { + "External id": 290567,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254076.053, "dur": 0.212, + "args": { + "External id": 290568,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254078.103, "dur": 0.591, + "args": { + "External id": 290569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254080.020, "dur": 0.473, + "args": { + "External id": 290570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254084.163, "dur": 0.467, + "args": { + "External id": 290571,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254086.096, "dur": 0.630, + "args": { + "External id": 290572,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254088.052, "dur": 3.607, + "args": { + "External id": 290573,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254093.023, "dur": 0.209, + "args": { + "External id": 290574,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254094.680, "dur": 0.163, + "args": { + "External id": 290575,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367254112.928, "dur": 47.366, + "args": { + "External id": 290576,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367254213.698, "dur": 132.282, + "args": { + "External id": 290577,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367254227.235, "dur": 5.098, + "args": { + "External id": 290578,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367254237.471, "dur": 11.274, + "args": { + "External id": 290579,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367254241.943, "dur": 6.360, + "args": { + "External id": 290580,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254246.019, "dur": 0.703, + "args": { + "External id": 290581,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367254256.199, "dur": 38.533, + "args": { + "External id": 290582,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254259.061, "dur": 3.178, + "args": { + "External id": 290583,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254263.926, "dur": 0.203, + "args": { + "External id": 290584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254271.202, "dur": 0.479, + "args": { + "External id": 290585,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254275.412, "dur": 1.693, + "args": { + "External id": 290586,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254278.616, "dur": 0.547, + "args": { + "External id": 290587,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254280.602, "dur": 3.004, + "args": { + "External id": 290588,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254284.832, "dur": 0.169, + "args": { + "External id": 290589,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254286.328, "dur": 0.163, + "args": { + "External id": 290590,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367254289.883, "dur": 0.171, + "args": { + "External id": 290591,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367254309.128, "dur": 28.817, + "args": { + "External id": 290592,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367254400.163, "dur": 335.287, + "args": { + "External id": 290593,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367254434.799, "dur": 294.946, + "args": { + "External id": 290594,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6126, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367254445.715, "dur": 277.947, + "args": { + "External id": 290595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367254760.482, "dur": 2.613, + "args": { + "External id": 290596,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6128, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070554, "tid": 2070554, + "ts": 5333367254854.024, "dur": 17134.692, + "args": { + "External id": 290597,"Record function id": 0, "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254951.208, "dur": 6.077, + "args": { + "External id": 290598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254960.754, "dur": 1.030, + "args": { + "External id": 290599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254963.704, "dur": 1.849, + "args": { + "External id": 290600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254967.075, "dur": 1.066, + "args": { + "External id": 290601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254969.424, "dur": 0.824, + "args": { + "External id": 290602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254971.581, "dur": 0.739, + "args": { + "External id": 290603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254982.315, "dur": 0.919, + "args": { + "External id": 290604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254985.067, "dur": 1.627, + "args": { + "External id": 290605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254988.115, "dur": 0.791, + "args": { + "External id": 290606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367254990.457, "dur": 0.525, + "args": { + "External id": 290607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367255015.354, "dur": 16928.186, + "args": { + "External id": 290608,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367255031.457, "dur": 16903.870, + "args": { + "External id": 290609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367255051.102, "dur": 14.403, + "args": { + "External id": 290610,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367255069.059, "dur": 16829.609, + "args": { + "External id": 290611,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367255071.581, "dur": 16826.358, + "args": { + "External id": 290612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367255077.953, "dur": 9.407, + "args": { + "External id": 290613,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367255089.116, "dur": 16805.642, + "args": { + "External id": 290614,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367272128.412, "dur": 34.157, + "args": { + "External id": 290615,"Sequence number": 1209191, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6147 + } + }, + { + "ph": "s", "id": 41, "pid": 2070554, "tid": 2070554, "ts": 5333367272128.412, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367272148.649, "dur": 9.348, + "args": { + "External id": 290616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367272152.730, "dur": 5.042, + "args": { + "External id": 290617,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367272242.265, "dur": 98.020, + "args": { + "External id": 290618,"Record function id": 0, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367272342.205, "dur": 1112.392, + "args": { + "External id": 290619,"Record function id": 0, "Ev Idx": 6151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367272386.684, "dur": 1054.432, + "args": { + "External id": 290620,"Sequence number": 1209192, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6152 + } + }, + { + "ph": "s", "id": 40, "pid": 2070554, "tid": 2070554, "ts": 5333367272386.684, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367272458.842, "dur": 49.325, + "args": { + "External id": 290621,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367272521.726, "dur": 138.324, + "args": { + "External id": 290622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367272673.403, "dur": 46.380, + "args": { + "External id": 290623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367272728.976, "dur": 32.363, + "args": { + "External id": 290624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367272791.006, "dur": 31.529, + "args": { + "External id": 290625,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367272841.040, "dur": 15.080, + "args": { + "External id": 290626,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367272875.842, "dur": 129.611, + "args": { + "External id": 290627,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367272925.120, "dur": 11.714, + "args": { + "External id": 290628,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367272930.873, "dur": 5.161, + "args": { + "External id": 290629,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367272939.965, "dur": 5.598, + "args": { + "External id": 290630,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367272947.091, "dur": 0.948, + "args": { + "External id": 290631,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367272950.418, "dur": 4.243, + "args": { + "External id": 290632,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367273016.133, "dur": 48.131, + "args": { + "External id": 290633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367273092.853, "dur": 28.075, + "args": { + "External id": 290634,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367273128.961, "dur": 58.191, + "args": { + "External id": 290635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367273199.483, "dur": 40.489, + "args": { + "External id": 290636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367273263.137, "dur": 26.221, + "args": { + "External id": 290637,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367273295.175, "dur": 34.665, + "args": { + "External id": 290638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367273349.605, "dur": 19.273, + "args": { + "External id": 290639,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2070554, "tid": 2070554, + "ts": 5333367273521.774, "dur": 76.581, + "args": { + "External id": 290640,"Record function id": 0, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367273713.050, "dur": 50.590, + "args": { + "External id": 290641,"Record function id": 0, "Ev Idx": 6173 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2070554, "tid": 2070554, + "ts": 5333367273773.604, "dur": 18197.405, + "args": { + "External id": 290642,"Record function id": 0, "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070554, "tid": 2070554, + "ts": 5333367273782.083, "dur": 819.380, + "args": { + "External id": 290643,"Record function id": 0, "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367273864.408, "dur": 9.382, + "args": { + "External id": 290644,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367273888.019, "dur": 39.319, + "args": { + "External id": 290645,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273893.630, "dur": 2.200, + "args": { + "External id": 290646,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273901.333, "dur": 0.186, + "args": { + "External id": 290647,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273902.943, "dur": 0.519, + "args": { + "External id": 290648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273905.171, "dur": 0.339, + "args": { + "External id": 290649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273909.338, "dur": 0.377, + "args": { + "External id": 290650,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273910.948, "dur": 0.520, + "args": { + "External id": 290651,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273913.470, "dur": 4.013, + "args": { + "External id": 290652,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273918.734, "dur": 0.259, + "args": { + "External id": 290653,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367273920.259, "dur": 0.318, + "args": { + "External id": 290654,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367273939.913, "dur": 45.581, + "args": { + "External id": 290655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367274019.681, "dur": 117.465, + "args": { + "External id": 290656,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367274031.220, "dur": 4.012, + "args": { + "External id": 290657,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367274040.070, "dur": 11.219, + "args": { + "External id": 290658,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367274045.033, "dur": 5.796, + "args": { + "External id": 290659,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274048.737, "dur": 0.884, + "args": { + "External id": 290660,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367274058.521, "dur": 32.892, + "args": { + "External id": 290661,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274060.865, "dur": 3.016, + "args": { + "External id": 290662,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274065.492, "dur": 0.421, + "args": { + "External id": 290663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274067.798, "dur": 0.208, + "args": { + "External id": 290664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274072.097, "dur": 1.765, + "args": { + "External id": 290665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274075.202, "dur": 0.361, + "args": { + "External id": 290666,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274077.398, "dur": 0.351, + "args": { + "External id": 290667,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274081.513, "dur": 0.579, + "args": { + "External id": 290668,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274083.631, "dur": 0.209, + "args": { + "External id": 290669,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274085.048, "dur": 2.285, + "args": { + "External id": 290670,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367274102.523, "dur": 25.957, + "args": { + "External id": 290671,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367274211.150, "dur": 300.365, + "args": { + "External id": 290672,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367274245.772, "dur": 261.188, + "args": { + "External id": 290673,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6205, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367274257.576, "dur": 244.027, + "args": { + "External id": 290674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367274533.900, "dur": 2.198, + "args": { + "External id": 290675,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6207, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070554, "tid": 2070554, + "ts": 5333367274661.232, "dur": 17111.979, + "args": { + "External id": 290676,"Record function id": 0, "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274761.276, "dur": 7.044, + "args": { + "External id": 290677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274771.907, "dur": 1.134, + "args": { + "External id": 290678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274774.784, "dur": 2.381, + "args": { + "External id": 290679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274779.285, "dur": 0.896, + "args": { + "External id": 290680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274781.829, "dur": 0.789, + "args": { + "External id": 290681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274783.774, "dur": 0.507, + "args": { + "External id": 290682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274787.727, "dur": 1.105, + "args": { + "External id": 290683,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274790.440, "dur": 1.838, + "args": { + "External id": 290684,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274794.063, "dur": 0.573, + "args": { + "External id": 290685,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367274796.151, "dur": 0.627, + "args": { + "External id": 290686,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367274829.254, "dur": 16900.144, + "args": { + "External id": 290687,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367274845.549, "dur": 16876.381, + "args": { + "External id": 290688,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367274868.162, "dur": 15.806, + "args": { + "External id": 290689,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367274887.540, "dur": 16797.991, + "args": { + "External id": 290690,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367274890.083, "dur": 16794.834, + "args": { + "External id": 290691,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367274896.435, "dur": 5.786, + "args": { + "External id": 290692,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367274903.943, "dur": 16777.443, + "args": { + "External id": 290693,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367291913.830, "dur": 31.543, + "args": { + "External id": 290694,"Sequence number": 1209193, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6226 + } + }, + { + "ph": "s", "id": 39, "pid": 2070554, "tid": 2070554, "ts": 5333367291913.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367291931.895, "dur": 8.852, + "args": { + "External id": 290695,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367291935.608, "dur": 4.949, + "args": { + "External id": 290696,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367292009.911, "dur": 93.939, + "args": { + "External id": 290697,"Record function id": 0, "Ev Idx": 6229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367292105.789, "dur": 1118.301, + "args": { + "External id": 290698,"Record function id": 0, "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367292147.520, "dur": 1062.851, + "args": { + "External id": 290699,"Sequence number": 1209194, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6231 + } + }, + { + "ph": "s", "id": 38, "pid": 2070554, "tid": 2070554, "ts": 5333367292147.520, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367292232.509, "dur": 50.330, + "args": { + "External id": 290700,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367292297.099, "dur": 108.355, + "args": { + "External id": 290701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367292415.593, "dur": 39.922, + "args": { + "External id": 290702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367292464.314, "dur": 31.131, + "args": { + "External id": 290703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367292520.110, "dur": 27.365, + "args": { + "External id": 290704,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367292565.265, "dur": 16.297, + "args": { + "External id": 290705,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367292599.930, "dur": 174.037, + "args": { + "External id": 290706,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367292689.548, "dur": 13.672, + "args": { + "External id": 290707,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367292695.397, "dur": 6.494, + "args": { + "External id": 290708,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367292705.903, "dur": 5.401, + "args": { + "External id": 290709,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367292712.924, "dur": 1.372, + "args": { + "External id": 290710,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367292716.600, "dur": 4.346, + "args": { + "External id": 290711,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367292784.709, "dur": 50.975, + "args": { + "External id": 290712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367292866.101, "dur": 28.523, + "args": { + "External id": 290713,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367292903.533, "dur": 41.932, + "args": { + "External id": 290714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367292955.176, "dur": 35.330, + "args": { + "External id": 290715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367293014.285, "dur": 26.544, + "args": { + "External id": 290716,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367293046.341, "dur": 33.632, + "args": { + "External id": 290717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367293101.110, "dur": 19.013, + "args": { + "External id": 290718,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2070554, "tid": 2070554, + "ts": 5333367293290.922, "dur": 74.306, + "args": { + "External id": 290719,"Record function id": 0, "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367293436.282, "dur": 45.476, + "args": { + "External id": 290720,"Record function id": 0, "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2070554, "tid": 2070554, + "ts": 5333367293491.023, "dur": 18168.283, + "args": { + "External id": 290721,"Record function id": 0, "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070554, "tid": 2070554, + "ts": 5333367293499.032, "dur": 906.403, + "args": { + "External id": 290722,"Record function id": 0, "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367293579.534, "dur": 8.865, + "args": { + "External id": 290723,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367293601.758, "dur": 77.009, + "args": { + "External id": 290724,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293607.179, "dur": 2.174, + "args": { + "External id": 290725,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293614.578, "dur": 0.397, + "args": { + "External id": 290726,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293616.842, "dur": 0.261, + "args": { + "External id": 290727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293653.840, "dur": 0.764, + "args": { + "External id": 290728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293659.888, "dur": 0.300, + "args": { + "External id": 290729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293661.971, "dur": 0.494, + "args": { + "External id": 290730,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293664.364, "dur": 4.171, + "args": { + "External id": 290731,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293670.140, "dur": 0.417, + "args": { + "External id": 290732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293672.077, "dur": 0.306, + "args": { + "External id": 290733,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367293692.789, "dur": 46.381, + "args": { + "External id": 290734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367293775.990, "dur": 123.931, + "args": { + "External id": 290735,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367293787.936, "dur": 5.151, + "args": { + "External id": 290736,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367293798.383, "dur": 10.820, + "args": { + "External id": 290737,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367293803.129, "dur": 5.673, + "args": { + "External id": 290738,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293806.993, "dur": 0.539, + "args": { + "External id": 290739,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367293817.157, "dur": 32.824, + "args": { + "External id": 290740,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293819.619, "dur": 3.060, + "args": { + "External id": 290741,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293824.257, "dur": 0.436, + "args": { + "External id": 290742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293826.835, "dur": 0.409, + "args": { + "External id": 290743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293831.170, "dur": 1.269, + "args": { + "External id": 290744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293833.640, "dur": 0.277, + "args": { + "External id": 290745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293835.503, "dur": 0.434, + "args": { + "External id": 290746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293839.363, "dur": 0.381, + "args": { + "External id": 290747,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293841.337, "dur": 0.252, + "args": { + "External id": 290748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367293842.936, "dur": 3.215, + "args": { + "External id": 290749,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367293865.361, "dur": 26.952, + "args": { + "External id": 290750,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367293953.424, "dur": 352.889, + "args": { + "External id": 290751,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367293987.080, "dur": 314.171, + "args": { + "External id": 290752,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6284, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367293997.883, "dur": 297.191, + "args": { + "External id": 290753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367294332.317, "dur": 2.407, + "args": { + "External id": 290754,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6286, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070554, "tid": 2070554, + "ts": 5333367294427.598, "dur": 16995.010, + "args": { + "External id": 290755,"Record function id": 0, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294530.606, "dur": 6.549, + "args": { + "External id": 290756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294540.643, "dur": 0.734, + "args": { + "External id": 290757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294557.733, "dur": 1.737, + "args": { + "External id": 290758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294563.168, "dur": 0.785, + "args": { + "External id": 290759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294565.377, "dur": 0.716, + "args": { + "External id": 290760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294567.355, "dur": 0.508, + "args": { + "External id": 290761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294569.461, "dur": 0.741, + "args": { + "External id": 290762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294574.398, "dur": 1.832, + "args": { + "External id": 290763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294577.853, "dur": 0.997, + "args": { + "External id": 290764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367294580.823, "dur": 3.882, + "args": { + "External id": 290765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367294606.420, "dur": 16770.167, + "args": { + "External id": 290766,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367294660.604, "dur": 16707.925, + "args": { + "External id": 290767,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367294679.399, "dur": 17.048, + "args": { + "External id": 290768,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367294702.290, "dur": 16630.474, + "args": { + "External id": 290769,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367294704.943, "dur": 16627.293, + "args": { + "External id": 290770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367294710.983, "dur": 5.791, + "args": { + "External id": 290771,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367294718.592, "dur": 16610.254, + "args": { + "External id": 290772,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367311562.153, "dur": 32.645, + "args": { + "External id": 290773,"Sequence number": 1209195, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6305 + } + }, + { + "ph": "s", "id": 37, "pid": 2070554, "tid": 2070554, "ts": 5333367311562.153, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367311580.731, "dur": 9.258, + "args": { + "External id": 290774,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367311584.634, "dur": 5.126, + "args": { + "External id": 290775,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367311704.246, "dur": 96.028, + "args": { + "External id": 290776,"Record function id": 0, "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367311801.943, "dur": 1118.822, + "args": { + "External id": 290777,"Record function id": 0, "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367311843.947, "dur": 1063.608, + "args": { + "External id": 290778,"Sequence number": 1209196, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6310 + } + }, + { + "ph": "s", "id": 36, "pid": 2070554, "tid": 2070554, "ts": 5333367311843.947, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367311913.902, "dur": 50.850, + "args": { + "External id": 290779,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367311978.163, "dur": 108.418, + "args": { + "External id": 290780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367312096.515, "dur": 38.633, + "args": { + "External id": 290781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367312143.868, "dur": 47.827, + "args": { + "External id": 290782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367312222.624, "dur": 30.441, + "args": { + "External id": 290783,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367312271.473, "dur": 16.215, + "args": { + "External id": 290784,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367312307.124, "dur": 131.843, + "args": { + "External id": 290785,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367312359.061, "dur": 11.862, + "args": { + "External id": 290786,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367312364.675, "dur": 5.310, + "args": { + "External id": 290787,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367312373.836, "dur": 5.511, + "args": { + "External id": 290788,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367312380.800, "dur": 1.034, + "args": { + "External id": 290789,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367312384.229, "dur": 3.044, + "args": { + "External id": 290790,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367312449.394, "dur": 50.141, + "args": { + "External id": 290791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367312529.679, "dur": 29.569, + "args": { + "External id": 290792,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367312568.030, "dur": 42.197, + "args": { + "External id": 290793,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367312651.098, "dur": 42.869, + "args": { + "External id": 290794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367312719.901, "dur": 28.284, + "args": { + "External id": 290795,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367312754.750, "dur": 35.145, + "args": { + "External id": 290796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367312814.212, "dur": 18.488, + "args": { + "External id": 290797,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2070554, "tid": 2070554, + "ts": 5333367312986.966, "dur": 76.584, + "args": { + "External id": 290798,"Record function id": 0, "Ev Idx": 6330 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367313135.764, "dur": 68.426, + "args": { + "External id": 290799,"Record function id": 0, "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2070554, "tid": 2070554, + "ts": 5333367313215.444, "dur": 18163.652, + "args": { + "External id": 290800,"Record function id": 0, "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070554, "tid": 2070554, + "ts": 5333367313225.041, "dur": 877.140, + "args": { + "External id": 290801,"Record function id": 0, "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367313308.526, "dur": 9.983, + "args": { + "External id": 290802,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367313332.213, "dur": 41.438, + "args": { + "External id": 290803,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313338.172, "dur": 2.332, + "args": { + "External id": 290804,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313345.516, "dur": 0.221, + "args": { + "External id": 290805,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313347.479, "dur": 0.394, + "args": { + "External id": 290806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313350.387, "dur": 0.173, + "args": { + "External id": 290807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313353.999, "dur": 0.528, + "args": { + "External id": 290808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313356.138, "dur": 0.478, + "args": { + "External id": 290809,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313358.307, "dur": 4.320, + "args": { + "External id": 290810,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313364.402, "dur": 0.584, + "args": { + "External id": 290811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313366.584, "dur": 0.410, + "args": { + "External id": 290812,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367313386.287, "dur": 47.309, + "args": { + "External id": 290813,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367313467.404, "dur": 146.762, + "args": { + "External id": 290814,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367313478.330, "dur": 4.971, + "args": { + "External id": 290815,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367313488.334, "dur": 39.776, + "args": { + "External id": 290816,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367313521.787, "dur": 5.832, + "args": { + "External id": 290817,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313525.734, "dur": 0.544, + "args": { + "External id": 290818,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367313535.368, "dur": 32.715, + "args": { + "External id": 290819,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313537.937, "dur": 2.561, + "args": { + "External id": 290820,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313541.819, "dur": 0.197, + "args": { + "External id": 290821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313543.284, "dur": 0.241, + "args": { + "External id": 290822,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313547.743, "dur": 1.148, + "args": { + "External id": 290823,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313550.487, "dur": 0.353, + "args": { + "External id": 290824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313552.243, "dur": 0.174, + "args": { + "External id": 290825,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313556.355, "dur": 0.517, + "args": { + "External id": 290826,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313558.347, "dur": 0.394, + "args": { + "External id": 290827,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367313560.197, "dur": 2.868, + "args": { + "External id": 290828,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367313578.767, "dur": 27.139, + "args": { + "External id": 290829,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367313709.810, "dur": 302.374, + "args": { + "External id": 290830,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367313743.366, "dur": 264.243, + "args": { + "External id": 290831,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6363, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367313758.019, "dur": 243.682, + "args": { + "External id": 290832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367314034.178, "dur": 2.287, + "args": { + "External id": 290833,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6365, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070554, "tid": 2070554, + "ts": 5333367314123.688, "dur": 17054.604, + "args": { + "External id": 290834,"Record function id": 0, "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314239.017, "dur": 7.118, + "args": { + "External id": 290835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314249.989, "dur": 0.924, + "args": { + "External id": 290836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314252.631, "dur": 1.839, + "args": { + "External id": 290837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314256.666, "dur": 4.365, + "args": { + "External id": 290838,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314263.062, "dur": 1.006, + "args": { + "External id": 290839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314265.247, "dur": 0.684, + "args": { + "External id": 290840,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314269.552, "dur": 0.860, + "args": { + "External id": 290841,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314272.377, "dur": 1.729, + "args": { + "External id": 290842,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314275.431, "dur": 0.596, + "args": { + "External id": 290843,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367314277.621, "dur": 0.862, + "args": { + "External id": 290844,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367314301.302, "dur": 16822.553, + "args": { + "External id": 290845,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367314320.961, "dur": 16794.426, + "args": { + "External id": 290846,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367314343.026, "dur": 14.355, + "args": { + "External id": 290847,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367314360.939, "dur": 16719.154, + "args": { + "External id": 290848,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367314363.306, "dur": 16716.118, + "args": { + "External id": 290849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367314369.603, "dur": 5.109, + "args": { + "External id": 290850,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367314376.623, "dur": 16699.163, + "args": { + "External id": 290851,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367331322.460, "dur": 31.847, + "args": { + "External id": 290852,"Sequence number": 1209197, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6384 + } + }, + { + "ph": "s", "id": 35, "pid": 2070554, "tid": 2070554, "ts": 5333367331322.460, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367331339.740, "dur": 9.284, + "args": { + "External id": 290853,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367331343.657, "dur": 5.110, + "args": { + "External id": 290854,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367331417.144, "dur": 94.275, + "args": { + "External id": 290855,"Record function id": 0, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367331513.274, "dur": 1160.604, + "args": { + "External id": 290856,"Record function id": 0, "Ev Idx": 6388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367331553.871, "dur": 1104.566, + "args": { + "External id": 290857,"Sequence number": 1209198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6389 + } + }, + { + "ph": "s", "id": 34, "pid": 2070554, "tid": 2070554, "ts": 5333367331553.871, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367331665.421, "dur": 49.619, + "args": { + "External id": 290858,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367331730.734, "dur": 109.327, + "args": { + "External id": 290859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367331850.108, "dur": 40.687, + "args": { + "External id": 290860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367331900.096, "dur": 31.860, + "args": { + "External id": 290861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367331959.674, "dur": 26.878, + "args": { + "External id": 290862,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367332004.865, "dur": 16.246, + "args": { + "External id": 290863,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367332041.074, "dur": 143.856, + "args": { + "External id": 290864,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367332089.214, "dur": 12.119, + "args": { + "External id": 290865,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367332094.968, "dur": 5.552, + "args": { + "External id": 290866,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367332104.239, "dur": 5.212, + "args": { + "External id": 290867,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367332110.698, "dur": 1.495, + "args": { + "External id": 290868,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367332114.638, "dur": 3.794, + "args": { + "External id": 290869,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367332197.892, "dur": 55.590, + "args": { + "External id": 290870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367332286.417, "dur": 30.334, + "args": { + "External id": 290871,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367332325.484, "dur": 44.196, + "args": { + "External id": 290872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367332379.424, "dur": 35.185, + "args": { + "External id": 290873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367332438.343, "dur": 29.602, + "args": { + "External id": 290874,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367332473.357, "dur": 34.042, + "args": { + "External id": 290875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367332527.576, "dur": 18.779, + "args": { + "External id": 290876,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2070554, "tid": 2070554, + "ts": 5333367332741.654, "dur": 78.770, + "args": { + "External id": 290877,"Record function id": 0, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367332892.046, "dur": 46.751, + "args": { + "External id": 290878,"Record function id": 0, "Ev Idx": 6410 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2070554, "tid": 2070554, + "ts": 5333367332947.481, "dur": 18198.350, + "args": { + "External id": 290879,"Record function id": 0, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070554, "tid": 2070554, + "ts": 5333367332957.425, "dur": 886.409, + "args": { + "External id": 290880,"Record function id": 0, "Ev Idx": 6412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367333040.851, "dur": 8.878, + "args": { + "External id": 290881,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367333063.036, "dur": 38.791, + "args": { + "External id": 290882,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333069.219, "dur": 2.134, + "args": { + "External id": 290883,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333076.692, "dur": 0.228, + "args": { + "External id": 290884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333078.561, "dur": 0.237, + "args": { + "External id": 290885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333080.441, "dur": 0.341, + "args": { + "External id": 290886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333084.339, "dur": 0.372, + "args": { + "External id": 290887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333086.323, "dur": 0.552, + "args": { + "External id": 290888,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333088.030, "dur": 3.250, + "args": { + "External id": 290889,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333093.151, "dur": 0.374, + "args": { + "External id": 290890,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333095.064, "dur": 0.361, + "args": { + "External id": 290891,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367333112.752, "dur": 44.465, + "args": { + "External id": 290892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367333210.182, "dur": 130.297, + "args": { + "External id": 290893,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367333222.237, "dur": 5.651, + "args": { + "External id": 290894,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367333233.386, "dur": 11.206, + "args": { + "External id": 290895,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367333237.944, "dur": 6.208, + "args": { + "External id": 290896,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333241.722, "dur": 0.723, + "args": { + "External id": 290897,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367333251.954, "dur": 32.316, + "args": { + "External id": 290898,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333254.718, "dur": 2.905, + "args": { + "External id": 290899,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333259.388, "dur": 0.217, + "args": { + "External id": 290900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333260.930, "dur": 0.176, + "args": { + "External id": 290901,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333265.627, "dur": 1.554, + "args": { + "External id": 290902,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333268.558, "dur": 0.158, + "args": { + "External id": 290903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333270.074, "dur": 0.169, + "args": { + "External id": 290904,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333273.870, "dur": 0.167, + "args": { + "External id": 290905,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333275.827, "dur": 0.168, + "args": { + "External id": 290906,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367333277.171, "dur": 2.729, + "args": { + "External id": 290907,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367333301.584, "dur": 30.441, + "args": { + "External id": 290908,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367333395.430, "dur": 347.750, + "args": { + "External id": 290909,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367333427.412, "dur": 310.566, + "args": { + "External id": 290910,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6442, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367333437.757, "dur": 291.790, + "args": { + "External id": 290911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367333769.225, "dur": 2.976, + "args": { + "External id": 290912,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6444, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070554, "tid": 2070554, + "ts": 5333367333866.500, "dur": 17080.232, + "args": { + "External id": 290913,"Record function id": 0, "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333968.275, "dur": 6.466, + "args": { + "External id": 290914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333978.170, "dur": 1.158, + "args": { + "External id": 290915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333980.923, "dur": 2.420, + "args": { + "External id": 290916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333984.984, "dur": 0.718, + "args": { + "External id": 290917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333987.070, "dur": 1.027, + "args": { + "External id": 290918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333989.411, "dur": 0.717, + "args": { + "External id": 290919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333993.796, "dur": 0.758, + "args": { + "External id": 290920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333996.126, "dur": 2.006, + "args": { + "External id": 290921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367333999.555, "dur": 0.861, + "args": { + "External id": 290922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367334001.917, "dur": 0.874, + "args": { + "External id": 290923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367334025.117, "dur": 16877.005, + "args": { + "External id": 290924,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367334041.605, "dur": 16852.650, + "args": { + "External id": 290925,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367334066.243, "dur": 15.719, + "args": { + "External id": 290926,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367334085.378, "dur": 16772.167, + "args": { + "External id": 290927,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367334087.704, "dur": 16769.133, + "args": { + "External id": 290928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367334096.415, "dur": 6.730, + "args": { + "External id": 290929,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367334104.867, "dur": 16748.697, + "args": { + "External id": 290930,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367351087.354, "dur": 33.013, + "args": { + "External id": 290931,"Sequence number": 1209199, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6463 + } + }, + { + "ph": "s", "id": 33, "pid": 2070554, "tid": 2070554, "ts": 5333367351087.354, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367351106.772, "dur": 8.958, + "args": { + "External id": 290932,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367351110.580, "dur": 4.912, + "args": { + "External id": 290933,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367351198.702, "dur": 93.995, + "args": { + "External id": 290934,"Record function id": 0, "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367351294.733, "dur": 1123.868, + "args": { + "External id": 290935,"Record function id": 0, "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367351338.290, "dur": 1067.166, + "args": { + "External id": 290936,"Sequence number": 1209200, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6468 + } + }, + { + "ph": "s", "id": 32, "pid": 2070554, "tid": 2070554, "ts": 5333367351338.290, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367351410.434, "dur": 49.291, + "args": { + "External id": 290937,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367351473.091, "dur": 111.219, + "args": { + "External id": 290938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367351594.296, "dur": 79.518, + "args": { + "External id": 290939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367351687.285, "dur": 35.792, + "args": { + "External id": 290940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367351750.059, "dur": 29.078, + "args": { + "External id": 290941,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367351796.953, "dur": 15.895, + "args": { + "External id": 290942,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367351831.844, "dur": 130.805, + "args": { + "External id": 290943,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367351882.377, "dur": 11.892, + "args": { + "External id": 290944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367351887.756, "dur": 5.550, + "args": { + "External id": 290945,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367351897.003, "dur": 5.002, + "args": { + "External id": 290946,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367351903.356, "dur": 1.231, + "args": { + "External id": 290947,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367351907.111, "dur": 4.572, + "args": { + "External id": 290948,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367351972.627, "dur": 48.116, + "args": { + "External id": 290949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367352049.198, "dur": 30.082, + "args": { + "External id": 290950,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367352087.847, "dur": 42.029, + "args": { + "External id": 290951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367352138.622, "dur": 50.954, + "args": { + "External id": 290952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367352216.601, "dur": 26.735, + "args": { + "External id": 290953,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367352249.405, "dur": 38.885, + "args": { + "External id": 290954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367352309.789, "dur": 19.585, + "args": { + "External id": 290955,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2070554, "tid": 2070554, + "ts": 5333367352483.851, "dur": 73.804, + "args": { + "External id": 290956,"Record function id": 0, "Ev Idx": 6488 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367352669.146, "dur": 51.718, + "args": { + "External id": 290957,"Record function id": 0, "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2070554, "tid": 2070554, + "ts": 5333367352731.196, "dur": 18133.743, + "args": { + "External id": 290958,"Record function id": 0, "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070554, "tid": 2070554, + "ts": 5333367352739.450, "dur": 821.376, + "args": { + "External id": 290959,"Record function id": 0, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367352822.648, "dur": 9.342, + "args": { + "External id": 290960,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367352845.869, "dur": 40.378, + "args": { + "External id": 290961,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352852.309, "dur": 2.380, + "args": { + "External id": 290962,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352860.042, "dur": 0.337, + "args": { + "External id": 290963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352862.023, "dur": 0.368, + "args": { + "External id": 290964,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352864.115, "dur": 0.509, + "args": { + "External id": 290965,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352868.355, "dur": 0.366, + "args": { + "External id": 290966,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352870.230, "dur": 0.249, + "args": { + "External id": 290967,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352871.895, "dur": 4.070, + "args": { + "External id": 290968,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352877.479, "dur": 0.361, + "args": { + "External id": 290969,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367352879.690, "dur": 0.275, + "args": { + "External id": 290970,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367352898.169, "dur": 46.762, + "args": { + "External id": 290971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367352978.781, "dur": 123.737, + "args": { + "External id": 290972,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367352989.978, "dur": 4.389, + "args": { + "External id": 290973,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367352999.310, "dur": 10.617, + "args": { + "External id": 290974,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367353003.836, "dur": 5.686, + "args": { + "External id": 290975,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353007.402, "dur": 0.587, + "args": { + "External id": 290976,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367353017.234, "dur": 36.301, + "args": { + "External id": 290977,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353023.838, "dur": 2.756, + "args": { + "External id": 290978,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353027.992, "dur": 0.430, + "args": { + "External id": 290979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353029.977, "dur": 0.214, + "args": { + "External id": 290980,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353033.678, "dur": 1.601, + "args": { + "External id": 290981,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353036.932, "dur": 0.149, + "args": { + "External id": 290982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353038.337, "dur": 0.344, + "args": { + "External id": 290983,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353042.531, "dur": 0.169, + "args": { + "External id": 290984,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353044.239, "dur": 0.335, + "args": { + "External id": 290985,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353046.087, "dur": 2.741, + "args": { + "External id": 290986,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367353067.219, "dur": 27.470, + "args": { + "External id": 290987,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367353156.386, "dur": 312.850, + "args": { + "External id": 290988,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367353202.850, "dur": 261.229, + "args": { + "External id": 290989,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6521, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367353214.552, "dur": 244.088, + "args": { + "External id": 290990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367353492.158, "dur": 2.293, + "args": { + "External id": 290991,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6523, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070554, "tid": 2070554, + "ts": 5333367353582.271, "dur": 17080.825, + "args": { + "External id": 290992,"Record function id": 0, "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353718.637, "dur": 6.845, + "args": { + "External id": 290993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353729.149, "dur": 1.028, + "args": { + "External id": 290994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353731.705, "dur": 2.720, + "args": { + "External id": 290995,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353736.273, "dur": 1.211, + "args": { + "External id": 290996,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353738.979, "dur": 0.589, + "args": { + "External id": 290997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353740.761, "dur": 1.211, + "args": { + "External id": 290998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353745.678, "dur": 0.900, + "args": { + "External id": 290999,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353748.265, "dur": 2.049, + "args": { + "External id": 291000,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353751.739, "dur": 0.839, + "args": { + "External id": 291001,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367353754.080, "dur": 0.687, + "args": { + "External id": 291002,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367353776.726, "dur": 16819.621, + "args": { + "External id": 291003,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367353795.704, "dur": 16792.788, + "args": { + "External id": 291004,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367353818.043, "dur": 15.832, + "args": { + "External id": 291005,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367353837.402, "dur": 16715.724, + "args": { + "External id": 291006,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367353840.052, "dur": 16712.522, + "args": { + "External id": 291007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367353846.329, "dur": 9.451, + "args": { + "External id": 291008,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367353857.470, "dur": 16691.592, + "args": { + "External id": 291009,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367370806.805, "dur": 33.000, + "args": { + "External id": 291010,"Sequence number": 1209201, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6542 + } + }, + { + "ph": "s", "id": 31, "pid": 2070554, "tid": 2070554, "ts": 5333367370806.805, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367370825.295, "dur": 9.607, + "args": { + "External id": 291011,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367370829.258, "dur": 5.383, + "args": { + "External id": 291012,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367370903.810, "dur": 94.755, + "args": { + "External id": 291013,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367371000.844, "dur": 1119.585, + "args": { + "External id": 291014,"Record function id": 0, "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367371043.809, "dur": 1062.997, + "args": { + "External id": 291015,"Sequence number": 1209202, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6547 + } + }, + { + "ph": "s", "id": 30, "pid": 2070554, "tid": 2070554, "ts": 5333367371043.809, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367371109.965, "dur": 45.390, + "args": { + "External id": 291016,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367371183.890, "dur": 108.742, + "args": { + "External id": 291017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367371305.324, "dur": 39.830, + "args": { + "External id": 291018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367371354.427, "dur": 31.660, + "args": { + "External id": 291019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367371414.229, "dur": 29.691, + "args": { + "External id": 291020,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367371471.256, "dur": 17.710, + "args": { + "External id": 291021,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367371507.215, "dur": 169.837, + "args": { + "External id": 291022,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367371558.143, "dur": 12.003, + "args": { + "External id": 291023,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367371564.176, "dur": 5.268, + "args": { + "External id": 291024,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367371573.099, "dur": 4.165, + "args": { + "External id": 291025,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367371578.406, "dur": 1.245, + "args": { + "External id": 291026,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367371582.372, "dur": 3.762, + "args": { + "External id": 291027,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367371689.516, "dur": 55.444, + "args": { + "External id": 291028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367371778.231, "dur": 30.821, + "args": { + "External id": 291029,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367371818.253, "dur": 43.154, + "args": { + "External id": 291030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367371870.825, "dur": 35.926, + "args": { + "External id": 291031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367371929.223, "dur": 26.709, + "args": { + "External id": 291032,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367371960.987, "dur": 34.689, + "args": { + "External id": 291033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367372015.986, "dur": 18.841, + "args": { + "External id": 291034,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2070554, "tid": 2070554, + "ts": 5333367372202.784, "dur": 78.628, + "args": { + "External id": 291035,"Record function id": 0, "Ev Idx": 6567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367372355.816, "dur": 48.626, + "args": { + "External id": 291036,"Record function id": 0, "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2070554, "tid": 2070554, + "ts": 5333367372414.478, "dur": 18231.331, + "args": { + "External id": 291037,"Record function id": 0, "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070554, "tid": 2070554, + "ts": 5333367372423.299, "dur": 858.900, + "args": { + "External id": 291038,"Record function id": 0, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367372506.488, "dur": 9.263, + "args": { + "External id": 291039,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367372529.778, "dur": 37.797, + "args": { + "External id": 291040,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372535.444, "dur": 2.167, + "args": { + "External id": 291041,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372542.610, "dur": 0.225, + "args": { + "External id": 291042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372544.361, "dur": 0.278, + "args": { + "External id": 291043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372546.154, "dur": 0.611, + "args": { + "External id": 291044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372550.486, "dur": 0.280, + "args": { + "External id": 291045,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372552.260, "dur": 0.398, + "args": { + "External id": 291046,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372554.402, "dur": 3.602, + "args": { + "External id": 291047,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372559.466, "dur": 0.204, + "args": { + "External id": 291048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372561.196, "dur": 0.210, + "args": { + "External id": 291049,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367372578.821, "dur": 79.901, + "args": { + "External id": 291050,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367372696.638, "dur": 127.588, + "args": { + "External id": 291051,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367372708.636, "dur": 5.823, + "args": { + "External id": 291052,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367372719.648, "dur": 11.223, + "args": { + "External id": 291053,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367372724.023, "dur": 6.391, + "args": { + "External id": 291054,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372727.906, "dur": 0.847, + "args": { + "External id": 291055,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367372738.382, "dur": 31.696, + "args": { + "External id": 291056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372740.931, "dur": 2.953, + "args": { + "External id": 291057,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372745.457, "dur": 0.247, + "args": { + "External id": 291058,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372747.221, "dur": 0.216, + "args": { + "External id": 291059,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372751.437, "dur": 1.270, + "args": { + "External id": 291060,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372754.198, "dur": 0.334, + "args": { + "External id": 291061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372756.018, "dur": 0.379, + "args": { + "External id": 291062,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372759.802, "dur": 0.190, + "args": { + "External id": 291063,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372761.406, "dur": 0.167, + "args": { + "External id": 291064,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367372763.401, "dur": 2.480, + "args": { + "External id": 291065,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367372786.094, "dur": 29.745, + "args": { + "External id": 291066,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367372878.601, "dur": 308.305, + "args": { + "External id": 291067,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367372909.752, "dur": 255.721, + "args": { + "External id": 291068,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6600, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367372920.725, "dur": 239.266, + "args": { + "External id": 291069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367373212.486, "dur": 3.459, + "args": { + "External id": 291070,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6602, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070554, "tid": 2070554, + "ts": 5333367373303.622, "dur": 17113.401, + "args": { + "External id": 291071,"Record function id": 0, "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373402.758, "dur": 6.369, + "args": { + "External id": 291072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373412.337, "dur": 1.009, + "args": { + "External id": 291073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373415.211, "dur": 2.555, + "args": { + "External id": 291074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373419.529, "dur": 0.932, + "args": { + "External id": 291075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373421.753, "dur": 0.870, + "args": { + "External id": 291076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373423.912, "dur": 0.675, + "args": { + "External id": 291077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373428.439, "dur": 0.666, + "args": { + "External id": 291078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373430.843, "dur": 1.873, + "args": { + "External id": 291079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373434.478, "dur": 0.680, + "args": { + "External id": 291080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367373436.525, "dur": 1.107, + "args": { + "External id": 291081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367373458.794, "dur": 16912.545, + "args": { + "External id": 291082,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367373475.507, "dur": 16887.951, + "args": { + "External id": 291083,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367373500.215, "dur": 14.947, + "args": { + "External id": 291084,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367373518.646, "dur": 16808.018, + "args": { + "External id": 291085,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367373521.159, "dur": 16804.861, + "args": { + "External id": 291086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367373527.335, "dur": 5.103, + "args": { + "External id": 291087,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367373534.119, "dur": 16788.818, + "args": { + "External id": 291088,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367390561.137, "dur": 32.499, + "args": { + "External id": 291089,"Sequence number": 1209203, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6621 + } + }, + { + "ph": "s", "id": 29, "pid": 2070554, "tid": 2070554, "ts": 5333367390561.137, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367390579.303, "dur": 9.129, + "args": { + "External id": 291090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367390583.162, "dur": 5.101, + "args": { + "External id": 291091,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367390689.926, "dur": 95.624, + "args": { + "External id": 291092,"Record function id": 0, "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367390787.484, "dur": 1121.101, + "args": { + "External id": 291093,"Record function id": 0, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367390836.504, "dur": 1058.686, + "args": { + "External id": 291094,"Sequence number": 1209204, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6626 + } + }, + { + "ph": "s", "id": 28, "pid": 2070554, "tid": 2070554, "ts": 5333367390836.504, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367390907.893, "dur": 47.941, + "args": { + "External id": 291095,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367390968.356, "dur": 106.944, + "args": { + "External id": 291096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367391087.529, "dur": 41.572, + "args": { + "External id": 291097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367391135.812, "dur": 47.706, + "args": { + "External id": 291098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367391216.636, "dur": 29.363, + "args": { + "External id": 291099,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367391262.205, "dur": 15.389, + "args": { + "External id": 291100,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367391297.448, "dur": 133.881, + "args": { + "External id": 291101,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367391348.871, "dur": 11.905, + "args": { + "External id": 291102,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367391354.312, "dur": 5.688, + "args": { + "External id": 291103,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367391363.688, "dur": 4.410, + "args": { + "External id": 291104,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367391369.590, "dur": 1.487, + "args": { + "External id": 291105,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367391375.937, "dur": 3.600, + "args": { + "External id": 291106,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367391441.778, "dur": 50.432, + "args": { + "External id": 291107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367391522.359, "dur": 29.394, + "args": { + "External id": 291108,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367391561.277, "dur": 42.299, + "args": { + "External id": 291109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367391610.082, "dur": 71.464, + "args": { + "External id": 291110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367391710.022, "dur": 30.640, + "args": { + "External id": 291111,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367391746.872, "dur": 38.826, + "args": { + "External id": 291112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367391804.294, "dur": 19.521, + "args": { + "External id": 291113,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2070554, "tid": 2070554, + "ts": 5333367391973.595, "dur": 73.165, + "args": { + "External id": 291114,"Record function id": 0, "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367392119.068, "dur": 62.421, + "args": { + "External id": 291115,"Record function id": 0, "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2070554, "tid": 2070554, + "ts": 5333367392194.466, "dur": 18248.615, + "args": { + "External id": 291116,"Record function id": 0, "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070554, "tid": 2070554, + "ts": 5333367392203.290, "dur": 876.929, + "args": { + "External id": 291117,"Record function id": 0, "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367392285.664, "dur": 10.647, + "args": { + "External id": 291118,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367392310.242, "dur": 38.983, + "args": { + "External id": 291119,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392315.942, "dur": 2.509, + "args": { + "External id": 291120,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392323.047, "dur": 0.218, + "args": { + "External id": 291121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392324.860, "dur": 0.381, + "args": { + "External id": 291122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392327.362, "dur": 0.478, + "args": { + "External id": 291123,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392331.200, "dur": 0.215, + "args": { + "External id": 291124,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392332.831, "dur": 0.334, + "args": { + "External id": 291125,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392334.618, "dur": 2.735, + "args": { + "External id": 291126,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392338.874, "dur": 2.166, + "args": { + "External id": 291127,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392342.339, "dur": 0.342, + "args": { + "External id": 291128,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367392364.020, "dur": 44.556, + "args": { + "External id": 291129,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367392442.221, "dur": 121.222, + "args": { + "External id": 291130,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367392453.173, "dur": 7.348, + "args": { + "External id": 291131,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367392465.617, "dur": 10.510, + "args": { + "External id": 291132,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367392470.339, "dur": 5.344, + "args": { + "External id": 291133,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392473.854, "dur": 0.625, + "args": { + "External id": 291134,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367392483.736, "dur": 31.254, + "args": { + "External id": 291135,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392485.920, "dur": 0.420, + "args": { + "External id": 291136,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392488.191, "dur": 3.147, + "args": { + "External id": 291137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392492.872, "dur": 0.522, + "args": { + "External id": 291138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392494.838, "dur": 0.407, + "args": { + "External id": 291139,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392499.167, "dur": 1.764, + "args": { + "External id": 291140,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392502.676, "dur": 0.375, + "args": { + "External id": 291141,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392504.782, "dur": 0.354, + "args": { + "External id": 291142,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392508.654, "dur": 0.382, + "args": { + "External id": 291143,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367392510.354, "dur": 0.354, + "args": { + "External id": 291144,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367392528.003, "dur": 27.365, + "args": { + "External id": 291145,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367392616.136, "dur": 367.783, + "args": { + "External id": 291146,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367392683.861, "dur": 294.767, + "args": { + "External id": 291147,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6679, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367392695.835, "dur": 276.704, + "args": { + "External id": 291148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367393007.010, "dur": 2.266, + "args": { + "External id": 291149,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6681, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070554, "tid": 2070554, + "ts": 5333367393103.713, "dur": 17139.203, + "args": { + "External id": 291150,"Record function id": 0, "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393224.902, "dur": 6.577, + "args": { + "External id": 291151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393235.457, "dur": 1.287, + "args": { + "External id": 291152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393238.323, "dur": 0.722, + "args": { + "External id": 291153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393240.918, "dur": 2.040, + "args": { + "External id": 291154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393244.614, "dur": 0.826, + "args": { + "External id": 291155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393246.834, "dur": 0.989, + "args": { + "External id": 291156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393251.554, "dur": 0.511, + "args": { + "External id": 291157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393253.767, "dur": 2.099, + "args": { + "External id": 291158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393257.594, "dur": 0.766, + "args": { + "External id": 291159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367393259.874, "dur": 0.619, + "args": { + "External id": 291160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367393282.936, "dur": 16916.957, + "args": { + "External id": 291161,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367393299.171, "dur": 16892.821, + "args": { + "External id": 291162,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367393321.992, "dur": 17.846, + "args": { + "External id": 291163,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367393343.322, "dur": 16803.099, + "args": { + "External id": 291164,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367393345.978, "dur": 16799.706, + "args": { + "External id": 291165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367393352.487, "dur": 6.798, + "args": { + "External id": 291166,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367393361.225, "dur": 16781.371, + "args": { + "External id": 291167,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367410383.957, "dur": 33.668, + "args": { + "External id": 291168,"Sequence number": 1209205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6700 + } + }, + { + "ph": "s", "id": 27, "pid": 2070554, "tid": 2070554, "ts": 5333367410383.957, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367410403.236, "dur": 9.101, + "args": { + "External id": 291169,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367410406.947, "dur": 5.184, + "args": { + "External id": 291170,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367410481.567, "dur": 94.519, + "args": { + "External id": 291171,"Record function id": 0, "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367410577.904, "dur": 1116.339, + "args": { + "External id": 291172,"Record function id": 0, "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367410617.642, "dur": 1062.030, + "args": { + "External id": 291173,"Sequence number": 1209206, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6705 + } + }, + { + "ph": "s", "id": 26, "pid": 2070554, "tid": 2070554, "ts": 5333367410617.642, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367410719.664, "dur": 48.054, + "args": { + "External id": 291174,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367410781.264, "dur": 105.973, + "args": { + "External id": 291175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367410897.193, "dur": 39.511, + "args": { + "External id": 291176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367410945.709, "dur": 30.912, + "args": { + "External id": 291177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367411002.226, "dur": 27.116, + "args": { + "External id": 291178,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367411048.750, "dur": 14.705, + "args": { + "External id": 291179,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367411082.258, "dur": 146.380, + "args": { + "External id": 291180,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367411132.904, "dur": 11.713, + "args": { + "External id": 291181,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367411137.818, "dur": 5.705, + "args": { + "External id": 291182,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367411147.289, "dur": 4.383, + "args": { + "External id": 291183,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367411153.047, "dur": 1.212, + "args": { + "External id": 291184,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367411156.944, "dur": 3.065, + "args": { + "External id": 291185,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367411240.123, "dur": 49.541, + "args": { + "External id": 291186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367411319.858, "dur": 29.081, + "args": { + "External id": 291187,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367411356.190, "dur": 42.247, + "args": { + "External id": 291188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367411407.927, "dur": 34.423, + "args": { + "External id": 291189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367411465.729, "dur": 27.099, + "args": { + "External id": 291190,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367411498.402, "dur": 34.420, + "args": { + "External id": 291191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367411553.083, "dur": 19.100, + "args": { + "External id": 291192,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2070554, "tid": 2070554, + "ts": 5333367411761.569, "dur": 78.037, + "args": { + "External id": 291193,"Record function id": 0, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367411910.842, "dur": 48.792, + "args": { + "External id": 291194,"Record function id": 0, "Ev Idx": 6726 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2070554, "tid": 2070554, + "ts": 5333367411968.753, "dur": 18237.836, + "args": { + "External id": 291195,"Record function id": 0, "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070554, "tid": 2070554, + "ts": 5333367411978.388, "dur": 875.665, + "args": { + "External id": 291196,"Record function id": 0, "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367412058.956, "dur": 9.620, + "args": { + "External id": 291197,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367412082.945, "dur": 36.288, + "args": { + "External id": 291198,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412088.093, "dur": 2.301, + "args": { + "External id": 291199,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412095.547, "dur": 0.227, + "args": { + "External id": 291200,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412096.801, "dur": 0.363, + "args": { + "External id": 291201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412098.575, "dur": 0.418, + "args": { + "External id": 291202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412102.557, "dur": 0.805, + "args": { + "External id": 291203,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412104.348, "dur": 0.418, + "args": { + "External id": 291204,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412105.937, "dur": 3.843, + "args": { + "External id": 291205,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412110.995, "dur": 0.413, + "args": { + "External id": 291206,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412112.465, "dur": 0.280, + "args": { + "External id": 291207,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367412130.072, "dur": 65.796, + "args": { + "External id": 291208,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367412233.789, "dur": 123.908, + "args": { + "External id": 291209,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367412245.365, "dur": 5.399, + "args": { + "External id": 291210,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367412255.820, "dur": 11.185, + "args": { + "External id": 291211,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367412260.445, "dur": 6.114, + "args": { + "External id": 291212,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412264.250, "dur": 0.704, + "args": { + "External id": 291213,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367412274.562, "dur": 30.731, + "args": { + "External id": 291214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412277.172, "dur": 2.947, + "args": { + "External id": 291215,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412281.662, "dur": 0.449, + "args": { + "External id": 291216,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412283.172, "dur": 0.488, + "args": { + "External id": 291217,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412287.738, "dur": 1.325, + "args": { + "External id": 291218,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412289.874, "dur": 0.523, + "args": { + "External id": 291219,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412291.576, "dur": 0.218, + "args": { + "External id": 291220,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412295.005, "dur": 0.243, + "args": { + "External id": 291221,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412296.347, "dur": 0.344, + "args": { + "External id": 291222,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367412297.902, "dur": 2.589, + "args": { + "External id": 291223,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367412321.642, "dur": 28.049, + "args": { + "External id": 291224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367412411.044, "dur": 343.974, + "args": { + "External id": 291225,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367412441.257, "dur": 308.921, + "args": { + "External id": 291226,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6758, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367412452.019, "dur": 291.692, + "args": { + "External id": 291227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367412781.378, "dur": 2.488, + "args": { + "External id": 291228,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6760, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070554, "tid": 2070554, + "ts": 5333367412875.816, "dur": 17119.924, + "args": { + "External id": 291229,"Record function id": 0, "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367412973.993, "dur": 6.476, + "args": { + "External id": 291230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367412983.659, "dur": 1.064, + "args": { + "External id": 291231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367412986.289, "dur": 2.045, + "args": { + "External id": 291232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367412990.025, "dur": 0.851, + "args": { + "External id": 291233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367412992.319, "dur": 0.693, + "args": { + "External id": 291234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367412994.409, "dur": 0.618, + "args": { + "External id": 291235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367413012.984, "dur": 1.031, + "args": { + "External id": 291236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367413015.839, "dur": 1.734, + "args": { + "External id": 291237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367413018.758, "dur": 0.782, + "args": { + "External id": 291238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367413020.760, "dur": 0.423, + "args": { + "External id": 291239,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367413043.895, "dur": 16908.050, + "args": { + "External id": 291240,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367413059.827, "dur": 16884.683, + "args": { + "External id": 291241,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367413079.120, "dur": 15.546, + "args": { + "External id": 291242,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367413098.104, "dur": 16810.422, + "args": { + "External id": 291243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367413100.775, "dur": 16806.863, + "args": { + "External id": 291244,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367413107.334, "dur": 5.846, + "args": { + "External id": 291245,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367413114.987, "dur": 16789.398, + "args": { + "External id": 291246,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367430133.432, "dur": 32.277, + "args": { + "External id": 291247,"Sequence number": 1209207, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6779 + } + }, + { + "ph": "s", "id": 25, "pid": 2070554, "tid": 2070554, "ts": 5333367430133.432, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367430152.144, "dur": 8.647, + "args": { + "External id": 291248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367430155.739, "dur": 4.814, + "args": { + "External id": 291249,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367430245.698, "dur": 93.269, + "args": { + "External id": 291250,"Record function id": 0, "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367430340.840, "dur": 1133.045, + "args": { + "External id": 291251,"Record function id": 0, "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367430385.900, "dur": 1074.893, + "args": { + "External id": 291252,"Sequence number": 1209208, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6784 + } + }, + { + "ph": "s", "id": 24, "pid": 2070554, "tid": 2070554, "ts": 5333367430385.900, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367430459.171, "dur": 49.552, + "args": { + "External id": 291253,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367430522.555, "dur": 138.271, + "args": { + "External id": 291254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367430675.319, "dur": 45.841, + "args": { + "External id": 291255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367430730.934, "dur": 30.923, + "args": { + "External id": 291256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367430795.618, "dur": 32.738, + "args": { + "External id": 291257,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367430844.437, "dur": 15.796, + "args": { + "External id": 291258,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367430878.923, "dur": 128.860, + "args": { + "External id": 291259,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367430927.848, "dur": 12.280, + "args": { + "External id": 291260,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367430933.294, "dur": 6.127, + "args": { + "External id": 291261,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367430942.883, "dur": 4.059, + "args": { + "External id": 291262,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367430947.983, "dur": 1.539, + "args": { + "External id": 291263,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367430952.045, "dur": 5.397, + "args": { + "External id": 291264,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367431018.733, "dur": 49.857, + "args": { + "External id": 291265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367431101.377, "dur": 29.091, + "args": { + "External id": 291266,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367431138.823, "dur": 59.025, + "args": { + "External id": 291267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367431211.423, "dur": 39.570, + "args": { + "External id": 291268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367431279.732, "dur": 26.823, + "args": { + "External id": 291269,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367431312.343, "dur": 36.813, + "args": { + "External id": 291270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367431367.416, "dur": 23.868, + "args": { + "External id": 291271,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2070554, "tid": 2070554, + "ts": 5333367431539.069, "dur": 75.527, + "args": { + "External id": 291272,"Record function id": 0, "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367431728.907, "dur": 48.984, + "args": { + "External id": 291273,"Record function id": 0, "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2070554, "tid": 2070554, + "ts": 5333367431788.598, "dur": 18274.556, + "args": { + "External id": 291274,"Record function id": 0, "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070554, "tid": 2070554, + "ts": 5333367431798.551, "dur": 797.289, + "args": { + "External id": 291275,"Record function id": 0, "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367431881.477, "dur": 9.674, + "args": { + "External id": 291276,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367431904.565, "dur": 34.729, + "args": { + "External id": 291277,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431909.910, "dur": 2.479, + "args": { + "External id": 291278,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431917.287, "dur": 0.256, + "args": { + "External id": 291279,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431918.789, "dur": 0.412, + "args": { + "External id": 291280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431920.298, "dur": 0.549, + "args": { + "External id": 291281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431924.068, "dur": 0.186, + "args": { + "External id": 291282,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431925.225, "dur": 0.583, + "args": { + "External id": 291283,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431926.787, "dur": 3.444, + "args": { + "External id": 291284,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431931.189, "dur": 0.312, + "args": { + "External id": 291285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367431932.867, "dur": 0.197, + "args": { + "External id": 291286,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367431951.603, "dur": 45.372, + "args": { + "External id": 291287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367432030.309, "dur": 110.653, + "args": { + "External id": 291288,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367432041.507, "dur": 4.310, + "args": { + "External id": 291289,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367432050.861, "dur": 10.465, + "args": { + "External id": 291290,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367432055.645, "dur": 5.260, + "args": { + "External id": 291291,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432058.938, "dur": 0.599, + "args": { + "External id": 291292,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367432067.616, "dur": 28.364, + "args": { + "External id": 291293,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432069.553, "dur": 2.244, + "args": { + "External id": 291294,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432073.431, "dur": 0.458, + "args": { + "External id": 291295,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432075.259, "dur": 0.656, + "args": { + "External id": 291296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432079.315, "dur": 1.324, + "args": { + "External id": 291297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432081.653, "dur": 0.334, + "args": { + "External id": 291298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432083.172, "dur": 0.152, + "args": { + "External id": 291299,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432086.705, "dur": 0.385, + "args": { + "External id": 291300,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432087.881, "dur": 0.171, + "args": { + "External id": 291301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432088.981, "dur": 2.462, + "args": { + "External id": 291302,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367432108.622, "dur": 24.023, + "args": { + "External id": 291303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367432213.703, "dur": 293.915, + "args": { + "External id": 291304,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367432245.345, "dur": 258.257, + "args": { + "External id": 291305,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6837, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367432256.424, "dur": 241.876, + "args": { + "External id": 291306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367432528.449, "dur": 2.477, + "args": { + "External id": 291307,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6839, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070554, "tid": 2070554, + "ts": 5333367432615.805, "dur": 17243.893, + "args": { + "External id": 291308,"Record function id": 0, "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432755.209, "dur": 6.693, + "args": { + "External id": 291309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432765.634, "dur": 0.764, + "args": { + "External id": 291310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432768.114, "dur": 2.108, + "args": { + "External id": 291311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432771.860, "dur": 0.826, + "args": { + "External id": 291312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432774.013, "dur": 0.771, + "args": { + "External id": 291313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432776.139, "dur": 0.740, + "args": { + "External id": 291314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432780.686, "dur": 1.207, + "args": { + "External id": 291315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432783.438, "dur": 1.857, + "args": { + "External id": 291316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432786.632, "dur": 0.723, + "args": { + "External id": 291317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367432788.958, "dur": 0.868, + "args": { + "External id": 291318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367432812.935, "dur": 16998.738, + "args": { + "External id": 291319,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367432829.070, "dur": 16974.339, + "args": { + "External id": 291320,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367432854.773, "dur": 15.549, + "args": { + "External id": 291321,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367432873.945, "dur": 16892.193, + "args": { + "External id": 291322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367432876.650, "dur": 16888.625, + "args": { + "External id": 291323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367432882.921, "dur": 5.589, + "args": { + "External id": 291324,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367432890.173, "dur": 16871.401, + "args": { + "External id": 291325,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367450005.464, "dur": 32.810, + "args": { + "External id": 291326,"Sequence number": 1209209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6858 + } + }, + { + "ph": "s", "id": 23, "pid": 2070554, "tid": 2070554, "ts": 5333367450005.464, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367450023.776, "dur": 9.494, + "args": { + "External id": 291327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367450027.773, "dur": 5.265, + "args": { + "External id": 291328,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367450126.315, "dur": 111.842, + "args": { + "External id": 291329,"Record function id": 0, "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367450241.507, "dur": 1119.950, + "args": { + "External id": 291330,"Record function id": 0, "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367450287.260, "dur": 1061.497, + "args": { + "External id": 291331,"Sequence number": 1209210, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6863 + } + }, + { + "ph": "s", "id": 22, "pid": 2070554, "tid": 2070554, "ts": 5333367450287.260, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367450358.891, "dur": 51.128, + "args": { + "External id": 291332,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367450424.376, "dur": 109.494, + "args": { + "External id": 291333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367450546.218, "dur": 38.896, + "args": { + "External id": 291334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367450591.730, "dur": 68.392, + "args": { + "External id": 291335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367450695.061, "dur": 29.334, + "args": { + "External id": 291336,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367450738.964, "dur": 17.079, + "args": { + "External id": 291337,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367450775.529, "dur": 131.584, + "args": { + "External id": 291338,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367450828.646, "dur": 11.096, + "args": { + "External id": 291339,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367450833.954, "dur": 4.986, + "args": { + "External id": 291340,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367450842.140, "dur": 4.089, + "args": { + "External id": 291341,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367450847.538, "dur": 1.211, + "args": { + "External id": 291342,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367450853.131, "dur": 2.769, + "args": { + "External id": 291343,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367450918.415, "dur": 52.353, + "args": { + "External id": 291344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367451000.899, "dur": 31.566, + "args": { + "External id": 291345,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367451039.798, "dur": 42.296, + "args": { + "External id": 291346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367451088.332, "dur": 35.274, + "args": { + "External id": 291347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367451145.578, "dur": 45.215, + "args": { + "External id": 291348,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367451198.231, "dur": 39.961, + "args": { + "External id": 291349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367451257.870, "dur": 20.222, + "args": { + "External id": 291350,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2070554, "tid": 2070554, + "ts": 5333367451425.240, "dur": 73.595, + "args": { + "External id": 291351,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367451568.208, "dur": 45.668, + "args": { + "External id": 291352,"Record function id": 0, "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2070554, "tid": 2070554, + "ts": 5333367451659.573, "dur": 18370.678, + "args": { + "External id": 291353,"Record function id": 0, "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070554, "tid": 2070554, + "ts": 5333367451669.550, "dur": 802.250, + "args": { + "External id": 291354,"Record function id": 0, "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367451754.312, "dur": 10.007, + "args": { + "External id": 291355,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367451778.690, "dur": 35.995, + "args": { + "External id": 291356,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451784.296, "dur": 2.143, + "args": { + "External id": 291357,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451791.335, "dur": 0.285, + "args": { + "External id": 291358,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451793.002, "dur": 0.368, + "args": { + "External id": 291359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451795.045, "dur": 0.267, + "args": { + "External id": 291360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451799.059, "dur": 0.615, + "args": { + "External id": 291361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451800.870, "dur": 0.611, + "args": { + "External id": 291362,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451802.336, "dur": 3.440, + "args": { + "External id": 291363,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451807.166, "dur": 0.219, + "args": { + "External id": 291364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451808.190, "dur": 0.166, + "args": { + "External id": 291365,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367451826.631, "dur": 43.379, + "args": { + "External id": 291366,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367451903.547, "dur": 109.047, + "args": { + "External id": 291367,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367451914.134, "dur": 4.006, + "args": { + "External id": 291368,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367451923.381, "dur": 9.652, + "args": { + "External id": 291369,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367451927.715, "dur": 4.876, + "args": { + "External id": 291370,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451930.778, "dur": 0.515, + "args": { + "External id": 291371,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367451939.620, "dur": 26.870, + "args": { + "External id": 291372,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451941.279, "dur": 2.900, + "args": { + "External id": 291373,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451945.750, "dur": 0.469, + "args": { + "External id": 291374,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451947.112, "dur": 0.343, + "args": { + "External id": 291375,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451950.871, "dur": 1.181, + "args": { + "External id": 291376,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451952.761, "dur": 0.163, + "args": { + "External id": 291377,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451953.678, "dur": 0.421, + "args": { + "External id": 291378,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451957.373, "dur": 0.151, + "args": { + "External id": 291379,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451958.461, "dur": 0.178, + "args": { + "External id": 291380,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367451959.553, "dur": 3.125, + "args": { + "External id": 291381,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367451981.110, "dur": 23.761, + "args": { + "External id": 291382,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367452065.222, "dur": 312.061, + "args": { + "External id": 291383,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367452095.466, "dur": 276.518, + "args": { + "External id": 291384,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6916, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367452105.741, "dur": 259.466, + "args": { + "External id": 291385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367452400.669, "dur": 2.638, + "args": { + "External id": 291386,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6918, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070554, "tid": 2070554, + "ts": 5333367452493.649, "dur": 17337.444, + "args": { + "External id": 291387,"Record function id": 0, "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452591.772, "dur": 6.104, + "args": { + "External id": 291388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452600.949, "dur": 0.798, + "args": { + "External id": 291389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452603.315, "dur": 1.792, + "args": { + "External id": 291390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452606.888, "dur": 0.700, + "args": { + "External id": 291391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452609.192, "dur": 0.543, + "args": { + "External id": 291392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452610.939, "dur": 0.723, + "args": { + "External id": 291393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452615.971, "dur": 0.910, + "args": { + "External id": 291394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452665.788, "dur": 2.708, + "args": { + "External id": 291395,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452672.957, "dur": 0.647, + "args": { + "External id": 291396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367452675.043, "dur": 0.455, + "args": { + "External id": 291397,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367452698.100, "dur": 17086.537, + "args": { + "External id": 291398,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367452715.417, "dur": 17061.187, + "args": { + "External id": 291399,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367452747.384, "dur": 16.285, + "args": { + "External id": 291400,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367452767.197, "dur": 16972.737, + "args": { + "External id": 291401,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367452769.304, "dur": 16969.853, + "args": { + "External id": 291402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367452775.912, "dur": 6.118, + "args": { + "External id": 291403,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367452783.566, "dur": 16952.120, + "args": { + "External id": 291404,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367469973.970, "dur": 31.179, + "args": { + "External id": 291405,"Sequence number": 1209211, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6937 + } + }, + { + "ph": "s", "id": 21, "pid": 2070554, "tid": 2070554, "ts": 5333367469973.970, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367469990.910, "dur": 8.962, + "args": { + "External id": 291406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367469994.671, "dur": 4.946, + "args": { + "External id": 291407,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367470069.804, "dur": 95.616, + "args": { + "External id": 291408,"Record function id": 0, "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367470181.876, "dur": 1119.689, + "args": { + "External id": 291409,"Record function id": 0, "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367470227.729, "dur": 1061.103, + "args": { + "External id": 291410,"Sequence number": 1209212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6942 + } + }, + { + "ph": "s", "id": 20, "pid": 2070554, "tid": 2070554, "ts": 5333367470227.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367470297.817, "dur": 47.870, + "args": { + "External id": 291411,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367470358.473, "dur": 106.717, + "args": { + "External id": 291412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367470476.276, "dur": 38.652, + "args": { + "External id": 291413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367470524.074, "dur": 32.620, + "args": { + "External id": 291414,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367470582.676, "dur": 28.738, + "args": { + "External id": 291415,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367470665.543, "dur": 20.514, + "args": { + "External id": 291416,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367470714.467, "dur": 133.915, + "args": { + "External id": 291417,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367470770.796, "dur": 12.357, + "args": { + "External id": 291418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367470775.972, "dur": 6.315, + "args": { + "External id": 291419,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367470785.663, "dur": 4.000, + "args": { + "External id": 291420,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367470790.733, "dur": 1.366, + "args": { + "External id": 291421,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367470794.380, "dur": 2.481, + "args": { + "External id": 291422,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367470858.800, "dur": 52.544, + "args": { + "External id": 291423,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367470943.073, "dur": 29.021, + "args": { + "External id": 291424,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367470980.472, "dur": 42.084, + "args": { + "External id": 291425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367471031.376, "dur": 35.576, + "args": { + "External id": 291426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367471090.351, "dur": 27.233, + "args": { + "External id": 291427,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367471122.931, "dur": 34.988, + "args": { + "External id": 291428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367471197.189, "dur": 20.127, + "args": { + "External id": 291429,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2070554, "tid": 2070554, + "ts": 5333367471367.498, "dur": 76.046, + "args": { + "External id": 291430,"Record function id": 0, "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367471515.737, "dur": 45.121, + "args": { + "External id": 291431,"Record function id": 0, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.26)", "pid": 2070554, "tid": 2070554, + "ts": 5333367471569.577, "dur": 18353.762, + "args": { + "External id": 291432,"Record function id": 0, "Ev Idx": 6964 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070554, "tid": 2070554, + "ts": 5333367471578.474, "dur": 845.303, + "args": { + "External id": 291433,"Record function id": 0, "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367471700.612, "dur": 10.345, + "args": { + "External id": 291434,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367471725.157, "dur": 34.016, + "args": { + "External id": 291435,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471730.613, "dur": 2.353, + "args": { + "External id": 291436,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471737.087, "dur": 0.234, + "args": { + "External id": 291437,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471738.375, "dur": 0.456, + "args": { + "External id": 291438,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471739.916, "dur": 0.380, + "args": { + "External id": 291439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471743.296, "dur": 0.421, + "args": { + "External id": 291440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471744.936, "dur": 0.516, + "args": { + "External id": 291441,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471746.590, "dur": 4.046, + "args": { + "External id": 291442,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471751.575, "dur": 0.187, + "args": { + "External id": 291443,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471752.785, "dur": 0.334, + "args": { + "External id": 291444,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367471771.342, "dur": 43.692, + "args": { + "External id": 291445,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367471850.040, "dur": 116.455, + "args": { + "External id": 291446,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367471861.586, "dur": 3.974, + "args": { + "External id": 291447,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367471870.955, "dur": 10.845, + "args": { + "External id": 291448,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367471875.908, "dur": 5.452, + "args": { + "External id": 291449,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471879.140, "dur": 0.601, + "args": { + "External id": 291450,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367471894.021, "dur": 25.030, + "args": { + "External id": 291451,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471895.580, "dur": 3.031, + "args": { + "External id": 291452,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471899.629, "dur": 0.228, + "args": { + "External id": 291453,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471900.518, "dur": 0.267, + "args": { + "External id": 291454,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471903.898, "dur": 1.191, + "args": { + "External id": 291455,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471905.964, "dur": 0.154, + "args": { + "External id": 291456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471906.728, "dur": 0.165, + "args": { + "External id": 291457,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471909.812, "dur": 0.259, + "args": { + "External id": 291458,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471910.818, "dur": 0.301, + "args": { + "External id": 291459,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367471912.215, "dur": 2.629, + "args": { + "External id": 291460,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367471932.633, "dur": 25.688, + "args": { + "External id": 291461,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367472019.836, "dur": 311.119, + "args": { + "External id": 291462,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367472049.131, "dur": 277.142, + "args": { + "External id": 291463,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6995, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367472059.834, "dur": 260.496, + "args": { + "External id": 291464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367472354.887, "dur": 2.322, + "args": { + "External id": 291465,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6997, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070554, "tid": 2070554, + "ts": 5333367472444.777, "dur": 17284.538, + "args": { + "External id": 291466,"Record function id": 0, "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472539.365, "dur": 6.061, + "args": { + "External id": 291467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472548.754, "dur": 1.182, + "args": { + "External id": 291468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472551.643, "dur": 1.869, + "args": { + "External id": 291469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472555.112, "dur": 0.759, + "args": { + "External id": 291470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472557.391, "dur": 0.760, + "args": { + "External id": 291471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472559.455, "dur": 0.640, + "args": { + "External id": 291472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472563.593, "dur": 0.671, + "args": { + "External id": 291473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472565.876, "dur": 2.221, + "args": { + "External id": 291474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472569.487, "dur": 1.079, + "args": { + "External id": 291475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367472571.952, "dur": 0.431, + "args": { + "External id": 291476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367472593.569, "dur": 17091.852, + "args": { + "External id": 291477,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367472608.178, "dur": 17069.404, + "args": { + "External id": 291478,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367472668.342, "dur": 16.126, + "args": { + "External id": 291479,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367472688.269, "dur": 16952.581, + "args": { + "External id": 291480,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367472690.786, "dur": 16949.247, + "args": { + "External id": 291481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367472697.113, "dur": 5.804, + "args": { + "External id": 291482,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367472704.685, "dur": 16911.936, + "args": { + "External id": 291483,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367489868.718, "dur": 31.437, + "args": { + "External id": 291484,"Sequence number": 1209213, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7016 + } + }, + { + "ph": "s", "id": 19, "pid": 2070554, "tid": 2070554, "ts": 5333367489868.718, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367489885.892, "dur": 8.952, + "args": { + "External id": 291485,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367489889.404, "dur": 5.245, + "args": { + "External id": 291486,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367489961.832, "dur": 95.600, + "args": { + "External id": 291487,"Record function id": 0, "Ev Idx": 7019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367490059.442, "dur": 1130.079, + "args": { + "External id": 291488,"Record function id": 0, "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367490100.108, "dur": 1060.455, + "args": { + "External id": 291489,"Sequence number": 1209214, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7021 + } + }, + { + "ph": "s", "id": 18, "pid": 2070554, "tid": 2070554, "ts": 5333367490100.108, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367490181.525, "dur": 49.894, + "args": { + "External id": 291490,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367490246.624, "dur": 105.420, + "args": { + "External id": 291491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367490361.582, "dur": 39.279, + "args": { + "External id": 291492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367490409.967, "dur": 31.365, + "args": { + "External id": 291493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367490468.322, "dur": 27.035, + "args": { + "External id": 291494,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367490513.204, "dur": 17.774, + "args": { + "External id": 291495,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367490549.235, "dur": 175.039, + "args": { + "External id": 291496,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367490601.315, "dur": 11.626, + "args": { + "External id": 291497,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367490606.615, "dur": 5.415, + "args": { + "External id": 291498,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367490615.560, "dur": 41.474, + "args": { + "External id": 291499,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367490659.804, "dur": 1.548, + "args": { + "External id": 291500,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367490663.707, "dur": 2.608, + "args": { + "External id": 291501,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367490735.568, "dur": 56.876, + "args": { + "External id": 291502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367490825.022, "dur": 33.210, + "args": { + "External id": 291503,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367490866.139, "dur": 43.610, + "args": { + "External id": 291504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367490917.930, "dur": 35.275, + "args": { + "External id": 291505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367490976.571, "dur": 28.210, + "args": { + "External id": 291506,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367491010.395, "dur": 34.843, + "args": { + "External id": 291507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367491066.907, "dur": 19.070, + "args": { + "External id": 291508,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.26)", "pid": 2070554, "tid": 2070554, + "ts": 5333367491258.398, "dur": 77.196, + "args": { + "External id": 291509,"Record function id": 0, "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367491408.219, "dur": 48.704, + "args": { + "External id": 291510,"Record function id": 0, "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.27)", "pid": 2070554, "tid": 2070554, + "ts": 5333367491466.023, "dur": 18330.896, + "args": { + "External id": 291511,"Record function id": 0, "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070554, "tid": 2070554, + "ts": 5333367491473.600, "dur": 893.262, + "args": { + "External id": 291512,"Record function id": 0, "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367491558.754, "dur": 9.014, + "args": { + "External id": 291513,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367491581.077, "dur": 31.438, + "args": { + "External id": 291514,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491586.064, "dur": 2.380, + "args": { + "External id": 291515,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491592.749, "dur": 0.363, + "args": { + "External id": 291516,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491593.931, "dur": 0.221, + "args": { + "External id": 291517,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491595.193, "dur": 0.215, + "args": { + "External id": 291518,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491598.642, "dur": 0.253, + "args": { + "External id": 291519,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491599.913, "dur": 0.338, + "args": { + "External id": 291520,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491601.286, "dur": 2.823, + "args": { + "External id": 291521,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491605.153, "dur": 0.277, + "args": { + "External id": 291522,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491606.264, "dur": 0.257, + "args": { + "External id": 291523,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367491660.811, "dur": 45.950, + "args": { + "External id": 291524,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367491743.118, "dur": 117.124, + "args": { + "External id": 291525,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367491754.490, "dur": 5.663, + "args": { + "External id": 291526,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367491765.253, "dur": 10.754, + "args": { + "External id": 291527,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367491769.858, "dur": 5.726, + "args": { + "External id": 291528,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491773.196, "dur": 0.726, + "args": { + "External id": 291529,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367491783.204, "dur": 29.523, + "args": { + "External id": 291530,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491785.274, "dur": 2.739, + "args": { + "External id": 291531,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491788.927, "dur": 0.491, + "args": { + "External id": 291532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491790.307, "dur": 0.516, + "args": { + "External id": 291533,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491794.704, "dur": 0.462, + "args": { + "External id": 291534,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491796.039, "dur": 0.591, + "args": { + "External id": 291535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491798.164, "dur": 0.608, + "args": { + "External id": 291536,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491802.231, "dur": 0.723, + "args": { + "External id": 291537,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491804.218, "dur": 0.465, + "args": { + "External id": 291538,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367491805.753, "dur": 2.713, + "args": { + "External id": 291539,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367491828.280, "dur": 23.424, + "args": { + "External id": 291540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367491914.516, "dur": 355.814, + "args": { + "External id": 291541,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367491944.506, "dur": 320.872, + "args": { + "External id": 291542,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7074, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367491954.871, "dur": 302.025, + "args": { + "External id": 291543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367492294.687, "dur": 2.116, + "args": { + "External id": 291544,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7076, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070554, "tid": 2070554, + "ts": 5333367492391.070, "dur": 17172.912, + "args": { + "External id": 291545,"Record function id": 0, "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492494.775, "dur": 6.719, + "args": { + "External id": 291546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492504.796, "dur": 1.433, + "args": { + "External id": 291547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492507.871, "dur": 1.196, + "args": { + "External id": 291548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492510.705, "dur": 0.841, + "args": { + "External id": 291549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492512.722, "dur": 1.119, + "args": { + "External id": 291550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492514.944, "dur": 1.258, + "args": { + "External id": 291551,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492519.680, "dur": 1.122, + "args": { + "External id": 291552,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492522.151, "dur": 2.257, + "args": { + "External id": 291553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492526.061, "dur": 1.088, + "args": { + "External id": 291554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367492528.712, "dur": 0.692, + "args": { + "External id": 291555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367492549.328, "dur": 16968.768, + "args": { + "External id": 291556,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367492565.593, "dur": 16944.513, + "args": { + "External id": 291557,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367492588.995, "dur": 14.403, + "args": { + "External id": 291558,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367492607.456, "dur": 16865.537, + "args": { + "External id": 291559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367492609.802, "dur": 16862.557, + "args": { + "External id": 291560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367492615.836, "dur": 44.896, + "args": { + "External id": 291561,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367492663.795, "dur": 16805.230, + "args": { + "External id": 291562,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367509736.453, "dur": 33.766, + "args": { + "External id": 291563,"Sequence number": 1209215, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7095 + } + }, + { + "ph": "s", "id": 17, "pid": 2070554, "tid": 2070554, "ts": 5333367509736.453, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367509755.436, "dur": 9.578, + "args": { + "External id": 291564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367509759.307, "dur": 5.304, + "args": { + "External id": 291565,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367509838.189, "dur": 97.518, + "args": { + "External id": 291566,"Record function id": 0, "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367509937.539, "dur": 1131.119, + "args": { + "External id": 291567,"Record function id": 0, "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367509979.587, "dur": 1075.812, + "args": { + "External id": 291568,"Sequence number": 1209216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7100 + } + }, + { + "ph": "s", "id": 16, "pid": 2070554, "tid": 2070554, "ts": 5333367509979.587, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367510048.501, "dur": 49.639, + "args": { + "External id": 291569,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367510111.813, "dur": 125.828, + "args": { + "External id": 291570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367510252.432, "dur": 43.300, + "args": { + "External id": 291571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367510306.097, "dur": 30.703, + "args": { + "External id": 291572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367510367.587, "dur": 34.287, + "args": { + "External id": 291573,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367510420.259, "dur": 17.085, + "args": { + "External id": 291574,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367510455.117, "dur": 134.315, + "args": { + "External id": 291575,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367510505.106, "dur": 11.544, + "args": { + "External id": 291576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367510509.893, "dur": 5.951, + "args": { + "External id": 291577,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367510522.553, "dur": 4.827, + "args": { + "External id": 291578,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367510528.683, "dur": 1.423, + "args": { + "External id": 291579,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367510534.762, "dur": 3.363, + "args": { + "External id": 291580,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367510600.301, "dur": 87.712, + "args": { + "External id": 291581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367510725.971, "dur": 31.554, + "args": { + "External id": 291582,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367510765.927, "dur": 45.166, + "args": { + "External id": 291583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367510818.088, "dur": 36.213, + "args": { + "External id": 291584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367510875.307, "dur": 29.153, + "args": { + "External id": 291585,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367510910.363, "dur": 33.952, + "args": { + "External id": 291586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367510963.381, "dur": 19.941, + "args": { + "External id": 291587,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.27)", "pid": 2070554, "tid": 2070554, + "ts": 5333367511133.847, "dur": 92.432, + "args": { + "External id": 291588,"Record function id": 0, "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367511301.157, "dur": 44.178, + "args": { + "External id": 291589,"Record function id": 0, "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.28)", "pid": 2070554, "tid": 2070554, + "ts": 5333367511354.451, "dur": 18349.966, + "args": { + "External id": 291590,"Record function id": 0, "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070554, "tid": 2070554, + "ts": 5333367511362.928, "dur": 833.194, + "args": { + "External id": 291591,"Record function id": 0, "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367511442.934, "dur": 9.367, + "args": { + "External id": 291592,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367511465.979, "dur": 34.272, + "args": { + "External id": 291593,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511471.108, "dur": 2.451, + "args": { + "External id": 291594,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511478.358, "dur": 0.800, + "args": { + "External id": 291595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511480.140, "dur": 0.567, + "args": { + "External id": 291596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511481.790, "dur": 0.469, + "args": { + "External id": 291597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511485.154, "dur": 0.533, + "args": { + "External id": 291598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511486.738, "dur": 0.708, + "args": { + "External id": 291599,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511488.445, "dur": 3.143, + "args": { + "External id": 291600,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511492.427, "dur": 0.600, + "args": { + "External id": 291601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511493.652, "dur": 0.521, + "args": { + "External id": 291602,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367511511.592, "dur": 42.301, + "args": { + "External id": 291603,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367511586.678, "dur": 151.615, + "args": { + "External id": 291604,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367511597.688, "dur": 4.751, + "args": { + "External id": 291605,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367511607.679, "dur": 10.387, + "args": { + "External id": 291606,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367511612.352, "dur": 5.308, + "args": { + "External id": 291607,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511615.607, "dur": 0.768, + "args": { + "External id": 291608,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367511660.942, "dur": 28.342, + "args": { + "External id": 291609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511663.102, "dur": 2.923, + "args": { + "External id": 291610,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511667.260, "dur": 0.640, + "args": { + "External id": 291611,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511668.956, "dur": 0.628, + "args": { + "External id": 291612,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511672.712, "dur": 0.645, + "args": { + "External id": 291613,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511674.371, "dur": 0.971, + "args": { + "External id": 291614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511676.052, "dur": 0.650, + "args": { + "External id": 291615,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511679.695, "dur": 0.583, + "args": { + "External id": 291616,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511680.990, "dur": 0.406, + "args": { + "External id": 291617,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367511682.400, "dur": 2.253, + "args": { + "External id": 291618,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367511702.726, "dur": 26.487, + "args": { + "External id": 291619,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367511792.640, "dur": 295.493, + "args": { + "External id": 291620,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367511822.428, "dur": 261.146, + "args": { + "External id": 291621,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7153, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367511832.974, "dur": 244.443, + "args": { + "External id": 291622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367512108.110, "dur": 2.335, + "args": { + "External id": 291623,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7155, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070554, "tid": 2070554, + "ts": 5333367512220.691, "dur": 17254.750, + "args": { + "External id": 291624,"Record function id": 0, "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512326.602, "dur": 6.389, + "args": { + "External id": 291625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512336.595, "dur": 0.944, + "args": { + "External id": 291626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512339.210, "dur": 1.125, + "args": { + "External id": 291627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512341.895, "dur": 1.333, + "args": { + "External id": 291628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512344.441, "dur": 1.085, + "args": { + "External id": 291629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512346.857, "dur": 1.065, + "args": { + "External id": 291630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512351.819, "dur": 1.042, + "args": { + "External id": 291631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512354.308, "dur": 3.188, + "args": { + "External id": 291632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512359.167, "dur": 0.842, + "args": { + "External id": 291633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367512361.541, "dur": 0.883, + "args": { + "External id": 291634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367512383.190, "dur": 17048.583, + "args": { + "External id": 291635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367512401.727, "dur": 17022.059, + "args": { + "External id": 291636,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367512425.535, "dur": 14.342, + "args": { + "External id": 291637,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367512443.503, "dur": 16944.085, + "args": { + "External id": 291638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367512446.196, "dur": 16940.710, + "args": { + "External id": 291639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367512452.421, "dur": 8.910, + "args": { + "External id": 291640,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367512462.821, "dur": 16921.074, + "args": { + "External id": 291641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367529618.400, "dur": 59.077, + "args": { + "External id": 291642,"Sequence number": 1209217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7174 + } + }, + { + "ph": "s", "id": 15, "pid": 2070554, "tid": 2070554, "ts": 5333367529618.400, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367529662.970, "dur": 9.520, + "args": { + "External id": 291643,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367529666.867, "dur": 5.223, + "args": { + "External id": 291644,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367529746.517, "dur": 93.589, + "args": { + "External id": 291645,"Record function id": 0, "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367529841.570, "dur": 1119.822, + "args": { + "External id": 291646,"Record function id": 0, "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367529882.791, "dur": 1064.827, + "args": { + "External id": 291647,"Sequence number": 1209218, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7179 + } + }, + { + "ph": "s", "id": 14, "pid": 2070554, "tid": 2070554, "ts": 5333367529882.791, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367529950.521, "dur": 48.053, + "args": { + "External id": 291648,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367530013.597, "dur": 108.177, + "args": { + "External id": 291649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367530130.700, "dur": 56.069, + "args": { + "External id": 291650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367530199.064, "dur": 35.931, + "args": { + "External id": 291651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367530263.085, "dur": 29.456, + "args": { + "External id": 291652,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367530310.083, "dur": 16.786, + "args": { + "External id": 291653,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367530344.411, "dur": 130.335, + "args": { + "External id": 291654,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367530395.828, "dur": 11.809, + "args": { + "External id": 291655,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367530400.717, "dur": 6.085, + "args": { + "External id": 291656,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367530409.992, "dur": 4.467, + "args": { + "External id": 291657,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367530415.890, "dur": 1.739, + "args": { + "External id": 291658,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367530419.869, "dur": 3.230, + "args": { + "External id": 291659,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367530486.516, "dur": 47.403, + "args": { + "External id": 291660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367530565.529, "dur": 29.434, + "args": { + "External id": 291661,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367530603.404, "dur": 82.022, + "args": { + "External id": 291662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367530698.982, "dur": 38.541, + "args": { + "External id": 291663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367530762.622, "dur": 30.904, + "args": { + "External id": 291664,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367530799.572, "dur": 35.891, + "args": { + "External id": 291665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367530855.411, "dur": 19.105, + "args": { + "External id": 291666,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.28)", "pid": 2070554, "tid": 2070554, + "ts": 5333367531026.452, "dur": 73.357, + "args": { + "External id": 291667,"Record function id": 0, "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367531192.209, "dur": 50.615, + "args": { + "External id": 291668,"Record function id": 0, "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.29)", "pid": 2070554, "tid": 2070554, + "ts": 5333367531252.200, "dur": 18316.720, + "args": { + "External id": 291669,"Record function id": 0, "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070554, "tid": 2070554, + "ts": 5333367531259.279, "dur": 833.648, + "args": { + "External id": 291670,"Record function id": 0, "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367531341.229, "dur": 10.136, + "args": { + "External id": 291671,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367531365.693, "dur": 35.077, + "args": { + "External id": 291672,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531370.934, "dur": 2.424, + "args": { + "External id": 291673,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531377.737, "dur": 0.625, + "args": { + "External id": 291674,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531379.607, "dur": 1.023, + "args": { + "External id": 291675,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531381.712, "dur": 0.661, + "args": { + "External id": 291676,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531385.591, "dur": 0.673, + "args": { + "External id": 291677,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531387.532, "dur": 0.429, + "args": { + "External id": 291678,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531388.817, "dur": 2.696, + "args": { + "External id": 291679,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531392.241, "dur": 0.494, + "args": { + "External id": 291680,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531393.710, "dur": 0.619, + "args": { + "External id": 291681,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367531412.291, "dur": 44.133, + "args": { + "External id": 291682,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367531490.068, "dur": 114.533, + "args": { + "External id": 291683,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367531501.218, "dur": 4.585, + "args": { + "External id": 291684,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367531510.819, "dur": 11.013, + "args": { + "External id": 291685,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367531515.703, "dur": 5.712, + "args": { + "External id": 291686,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531519.255, "dur": 0.954, + "args": { + "External id": 291687,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367531528.219, "dur": 30.140, + "args": { + "External id": 291688,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531529.869, "dur": 2.858, + "args": { + "External id": 291689,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531534.178, "dur": 0.546, + "args": { + "External id": 291690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531535.616, "dur": 0.828, + "args": { + "External id": 291691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531539.785, "dur": 0.641, + "args": { + "External id": 291692,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531541.217, "dur": 0.701, + "args": { + "External id": 291693,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531542.747, "dur": 0.580, + "args": { + "External id": 291694,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531546.652, "dur": 0.454, + "args": { + "External id": 291695,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531548.092, "dur": 0.967, + "args": { + "External id": 291696,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367531550.237, "dur": 3.262, + "args": { + "External id": 291697,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367531573.112, "dur": 23.622, + "args": { + "External id": 291698,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367531698.657, "dur": 301.219, + "args": { + "External id": 291699,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367531730.187, "dur": 265.189, + "args": { + "External id": 291700,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7232, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367531741.004, "dur": 248.846, + "args": { + "External id": 291701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367532024.039, "dur": 2.427, + "args": { + "External id": 291702,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7234, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070554, "tid": 2070554, + "ts": 5333367532113.389, "dur": 17264.483, + "args": { + "External id": 291703,"Record function id": 0, "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532229.144, "dur": 6.690, + "args": { + "External id": 291704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532239.335, "dur": 1.102, + "args": { + "External id": 291705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532242.048, "dur": 1.296, + "args": { + "External id": 291706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532245.013, "dur": 1.338, + "args": { + "External id": 291707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532247.814, "dur": 1.046, + "args": { + "External id": 291708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532250.026, "dur": 0.845, + "args": { + "External id": 291709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532254.167, "dur": 1.077, + "args": { + "External id": 291710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532256.490, "dur": 2.349, + "args": { + "External id": 291711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532260.394, "dur": 0.997, + "args": { + "External id": 291712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367532262.912, "dur": 0.800, + "args": { + "External id": 291713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367532298.670, "dur": 17035.420, + "args": { + "External id": 291714,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367532314.772, "dur": 17011.631, + "args": { + "External id": 291715,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367532335.056, "dur": 13.823, + "args": { + "External id": 291716,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367532352.320, "dur": 16937.691, + "args": { + "External id": 291717,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367532354.943, "dur": 16934.303, + "args": { + "External id": 291718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367532364.566, "dur": 5.540, + "args": { + "External id": 291719,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367532371.887, "dur": 16914.541, + "args": { + "External id": 291720,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367549513.708, "dur": 31.469, + "args": { + "External id": 291721,"Sequence number": 1209219, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7253 + } + }, + { + "ph": "s", "id": 13, "pid": 2070554, "tid": 2070554, "ts": 5333367549513.708, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367549530.764, "dur": 9.547, + "args": { + "External id": 291722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367549534.843, "dur": 5.258, + "args": { + "External id": 291723,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367549608.220, "dur": 138.089, + "args": { + "External id": 291724,"Record function id": 0, "Ev Idx": 7256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367549749.603, "dur": 1146.997, + "args": { + "External id": 291725,"Record function id": 0, "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367549792.179, "dur": 1091.162, + "args": { + "External id": 291726,"Sequence number": 1209220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7258 + } + }, + { + "ph": "s", "id": 12, "pid": 2070554, "tid": 2070554, "ts": 5333367549792.179, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367549866.897, "dur": 50.263, + "args": { + "External id": 291727,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367549934.545, "dur": 107.195, + "args": { + "External id": 291728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367550051.149, "dur": 41.699, + "args": { + "External id": 291729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367550099.984, "dur": 31.327, + "args": { + "External id": 291730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367550160.068, "dur": 46.984, + "args": { + "External id": 291731,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367550226.677, "dur": 19.551, + "args": { + "External id": 291732,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367550262.627, "dur": 135.153, + "args": { + "External id": 291733,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367550315.640, "dur": 13.483, + "args": { + "External id": 291734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367550320.300, "dur": 7.723, + "args": { + "External id": 291735,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367550331.893, "dur": 4.762, + "args": { + "External id": 291736,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367550337.809, "dur": 3.797, + "args": { + "External id": 291737,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367550344.327, "dur": 2.849, + "args": { + "External id": 291738,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367550409.654, "dur": 54.213, + "args": { + "External id": 291739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367550500.555, "dur": 33.429, + "args": { + "External id": 291740,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367550544.656, "dur": 43.119, + "args": { + "External id": 291741,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367550596.003, "dur": 72.263, + "args": { + "External id": 291742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367550695.901, "dur": 28.043, + "args": { + "External id": 291743,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367550733.530, "dur": 41.072, + "args": { + "External id": 291744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367550792.623, "dur": 24.049, + "args": { + "External id": 291745,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.29)", "pid": 2070554, "tid": 2070554, + "ts": 5333367550961.089, "dur": 73.310, + "args": { + "External id": 291746,"Record function id": 0, "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367551107.250, "dur": 46.747, + "args": { + "External id": 291747,"Record function id": 0, "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.30)", "pid": 2070554, "tid": 2070554, + "ts": 5333367551163.779, "dur": 18219.217, + "args": { + "External id": 291748,"Record function id": 0, "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070554, "tid": 2070554, + "ts": 5333367551190.447, "dur": 816.646, + "args": { + "External id": 291749,"Record function id": 0, "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367551275.694, "dur": 9.763, + "args": { + "External id": 291750,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367551299.526, "dur": 35.194, + "args": { + "External id": 291751,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551304.968, "dur": 2.017, + "args": { + "External id": 291752,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551311.364, "dur": 0.659, + "args": { + "External id": 291753,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551313.426, "dur": 0.386, + "args": { + "External id": 291754,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551314.936, "dur": 0.662, + "args": { + "External id": 291755,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551319.045, "dur": 0.408, + "args": { + "External id": 291756,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551320.603, "dur": 0.679, + "args": { + "External id": 291757,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551322.289, "dur": 3.266, + "args": { + "External id": 291758,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551326.891, "dur": 0.616, + "args": { + "External id": 291759,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551328.262, "dur": 0.193, + "args": { + "External id": 291760,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367551346.705, "dur": 40.800, + "args": { + "External id": 291761,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367551421.159, "dur": 106.467, + "args": { + "External id": 291762,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367551431.825, "dur": 3.736, + "args": { + "External id": 291763,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367551440.553, "dur": 10.612, + "args": { + "External id": 291764,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367551445.718, "dur": 4.995, + "args": { + "External id": 291765,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551448.669, "dur": 0.731, + "args": { + "External id": 291766,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367551458.044, "dur": 26.804, + "args": { + "External id": 291767,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551459.769, "dur": 2.894, + "args": { + "External id": 291768,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551463.623, "dur": 0.278, + "args": { + "External id": 291769,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551464.732, "dur": 0.464, + "args": { + "External id": 291770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551468.088, "dur": 0.270, + "args": { + "External id": 291771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551469.495, "dur": 0.540, + "args": { + "External id": 291772,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551470.864, "dur": 0.199, + "args": { + "External id": 291773,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551474.090, "dur": 0.958, + "args": { + "External id": 291774,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551475.957, "dur": 0.814, + "args": { + "External id": 291775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367551477.964, "dur": 2.464, + "args": { + "External id": 291776,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367551497.329, "dur": 22.081, + "args": { + "External id": 291777,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367551580.078, "dur": 333.421, + "args": { + "External id": 291778,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367551610.086, "dur": 298.612, + "args": { + "External id": 291779,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7311, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367551655.899, "dur": 247.125, + "args": { + "External id": 291780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367551938.022, "dur": 2.066, + "args": { + "External id": 291781,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7313, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070554, "tid": 2070554, + "ts": 5333367552030.867, "dur": 17131.726, + "args": { + "External id": 291782,"Record function id": 0, "Ev Idx": 7314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552129.592, "dur": 9.567, + "args": { + "External id": 291783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552142.838, "dur": 1.047, + "args": { + "External id": 291784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552145.537, "dur": 1.014, + "args": { + "External id": 291785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552148.450, "dur": 1.134, + "args": { + "External id": 291786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552151.339, "dur": 1.087, + "args": { + "External id": 291787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552154.028, "dur": 1.047, + "args": { + "External id": 291788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552158.912, "dur": 0.781, + "args": { + "External id": 291789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552161.212, "dur": 2.195, + "args": { + "External id": 291790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552165.196, "dur": 17.411, + "args": { + "External id": 291791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367552186.632, "dur": 1.225, + "args": { + "External id": 291792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367552213.094, "dur": 16904.829, + "args": { + "External id": 291793,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367552228.625, "dur": 16881.063, + "args": { + "External id": 291794,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367552251.786, "dur": 14.318, + "args": { + "External id": 291795,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367552269.485, "dur": 16803.803, + "args": { + "External id": 291796,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367552272.458, "dur": 16800.226, + "args": { + "External id": 291797,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367552278.458, "dur": 5.802, + "args": { + "External id": 291798,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367552286.117, "dur": 16783.272, + "args": { + "External id": 291799,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367569322.887, "dur": 33.216, + "args": { + "External id": 291800,"Sequence number": 1209221, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7332 + } + }, + { + "ph": "s", "id": 11, "pid": 2070554, "tid": 2070554, "ts": 5333367569322.887, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367569340.676, "dur": 10.304, + "args": { + "External id": 291801,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367569344.832, "dur": 5.848, + "args": { + "External id": 291802,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367569422.474, "dur": 94.109, + "args": { + "External id": 291803,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367569518.325, "dur": 1134.231, + "args": { + "External id": 291804,"Record function id": 0, "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367569559.468, "dur": 1047.853, + "args": { + "External id": 291805,"Sequence number": 1209222, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7337 + } + }, + { + "ph": "s", "id": 10, "pid": 2070554, "tid": 2070554, "ts": 5333367569559.468, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367569659.576, "dur": 52.494, + "args": { + "External id": 291806,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367569728.246, "dur": 107.042, + "args": { + "External id": 291807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367569845.053, "dur": 41.541, + "args": { + "External id": 291808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367569896.554, "dur": 31.829, + "args": { + "External id": 291809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367569954.032, "dur": 25.789, + "args": { + "External id": 291810,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367569998.484, "dur": 16.518, + "args": { + "External id": 291811,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367570032.426, "dur": 127.745, + "args": { + "External id": 291812,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367570082.843, "dur": 12.000, + "args": { + "External id": 291813,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367570087.708, "dur": 6.191, + "args": { + "External id": 291814,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367570097.481, "dur": 4.390, + "args": { + "External id": 291815,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367570102.999, "dur": 1.552, + "args": { + "External id": 291816,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367570106.898, "dur": 3.177, + "args": { + "External id": 291817,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367570188.796, "dur": 52.011, + "args": { + "External id": 291818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367570274.076, "dur": 31.177, + "args": { + "External id": 291819,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367570314.675, "dur": 42.882, + "args": { + "External id": 291820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367570367.498, "dur": 35.594, + "args": { + "External id": 291821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367570426.071, "dur": 28.281, + "args": { + "External id": 291822,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367570460.426, "dur": 34.868, + "args": { + "External id": 291823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367570515.809, "dur": 19.226, + "args": { + "External id": 291824,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.30)", "pid": 2070554, "tid": 2070554, + "ts": 5333367570722.380, "dur": 76.599, + "args": { + "External id": 291825,"Record function id": 0, "Ev Idx": 7357 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070554, "tid": 2070554, + "ts": 5333367570872.839, "dur": 45.982, + "args": { + "External id": 291826,"Record function id": 0, "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.31)", "pid": 2070554, "tid": 2070554, + "ts": 5333367570928.394, "dur": 18412.758, + "args": { + "External id": 291827,"Record function id": 0, "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.31)", "pid": 2070554, "tid": 2070554, + "ts": 5333367570937.050, "dur": 887.677, + "args": { + "External id": 291828,"Record function id": 0, "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367571025.842, "dur": 8.979, + "args": { + "External id": 291829,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367571048.443, "dur": 36.953, + "args": { + "External id": 291830,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571054.113, "dur": 2.453, + "args": { + "External id": 291831,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571060.705, "dur": 0.514, + "args": { + "External id": 291832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571062.519, "dur": 0.928, + "args": { + "External id": 291833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571064.519, "dur": 0.720, + "args": { + "External id": 291834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571068.504, "dur": 0.907, + "args": { + "External id": 291835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571070.260, "dur": 1.111, + "args": { + "External id": 291836,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571072.569, "dur": 2.950, + "args": { + "External id": 291837,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571076.853, "dur": 1.135, + "args": { + "External id": 291838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571078.789, "dur": 0.833, + "args": { + "External id": 291839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367571096.790, "dur": 43.029, + "args": { + "External id": 291840,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070554, "tid": 2070554, + "ts": 5333367571207.582, "dur": 120.968, + "args": { + "External id": 291841,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "7", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367571220.634, "dur": 7.643, + "args": { + "External id": 291842,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070554, "tid": 2070554, + "ts": 5333367571233.735, "dur": 10.898, + "args": { + "External id": 291843,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367571238.094, "dur": 5.915, + "args": { + "External id": 291844,"Record function id": 0, "Concrete Inputs": ["", "0", "44961280", "51384320", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571241.142, "dur": 0.928, + "args": { + "External id": 291845,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070554, "tid": 2070554, + "ts": 5333367571252.711, "dur": 25.242, + "args": { + "External id": 291846,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571254.838, "dur": 0.402, + "args": { + "External id": 291847,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "44961280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571256.202, "dur": 2.727, + "args": { + "External id": 291848,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "44961536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571260.342, "dur": 0.533, + "args": { + "External id": 291849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45485824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571262.055, "dur": 0.581, + "args": { + "External id": 291850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46010112"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571265.799, "dur": 0.563, + "args": { + "External id": 291851,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46534400"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571266.946, "dur": 0.787, + "args": { + "External id": 291852,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "47058688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571268.645, "dur": 0.631, + "args": { + "External id": 291853,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "47058944"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571271.806, "dur": 0.650, + "args": { + "External id": 291854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "48500736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367571273.364, "dur": 0.616, + "args": { + "External id": 291855,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "49942528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367571293.720, "dur": 26.277, + "args": { + "External id": 291856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070554, "tid": 2070554, + "ts": 5333367571385.325, "dur": 343.630, + "args": { + "External id": 291857,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367571414.526, "dur": 309.190, + "args": { + "External id": 291858,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 7, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7390, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070554, "tid": 2070554, + "ts": 5333367571424.996, "dur": 291.989, + "args": { + "External id": 291859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333367571752.807, "dur": 3.020, + "args": { + "External id": 291860,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7392, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.31)", "pid": 2070554, "tid": 2070554, + "ts": 5333367571846.584, "dur": 17271.957, + "args": { + "External id": 291861,"Record function id": 0, "Ev Idx": 7393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571948.422, "dur": 9.786, + "args": { + "External id": 291862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571961.828, "dur": 1.388, + "args": { + "External id": 291863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571964.884, "dur": 1.083, + "args": { + "External id": 291864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571967.667, "dur": 1.066, + "args": { + "External id": 291865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571970.081, "dur": 1.194, + "args": { + "External id": 291866,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571972.707, "dur": 0.844, + "args": { + "External id": 291867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571977.280, "dur": 1.068, + "args": { + "External id": 291868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571979.887, "dur": 1.932, + "args": { + "External id": 291869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571983.456, "dur": 1.336, + "args": { + "External id": 291870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367571986.247, "dur": 0.688, + "args": { + "External id": 291871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367572007.424, "dur": 17064.785, + "args": { + "External id": 291872,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367572027.201, "dur": 17037.240, + "args": { + "External id": 291873,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367572049.424, "dur": 14.734, + "args": { + "External id": 291874,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367572067.618, "dur": 16959.022, + "args": { + "External id": 291875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367572070.104, "dur": 16955.860, + "args": { + "External id": 291876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367572076.354, "dur": 5.559, + "args": { + "External id": 291877,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367572086.789, "dur": 16935.556, + "args": { + "External id": 291878,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367589276.653, "dur": 37.965, + "args": { + "External id": 291879,"Sequence number": 1209223, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7411 + } + }, + { + "ph": "s", "id": 9, "pid": 2070554, "tid": 2070554, "ts": 5333367589276.653, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333367589299.532, "dur": 9.993, + "args": { + "External id": 291880,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367589303.300, "dur": 5.801, + "args": { + "External id": 291881,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367589381.571, "dur": 95.058, + "args": { + "External id": 291882,"Record function id": 0, "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070554, "tid": 2070554, + "ts": 5333367589478.584, "dur": 1117.618, + "args": { + "External id": 291883,"Record function id": 0, "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367589521.435, "dur": 1060.734, + "args": { + "External id": 291884,"Sequence number": 1209224, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7416 + } + }, + { + "ph": "s", "id": 8, "pid": 2070554, "tid": 2070554, "ts": 5333367589521.435, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367589589.453, "dur": 85.957, + "args": { + "External id": 291885,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367589692.946, "dur": 109.291, + "args": { + "External id": 291886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367589814.301, "dur": 38.297, + "args": { + "External id": 291887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367589859.556, "dur": 30.898, + "args": { + "External id": 291888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367589926.002, "dur": 31.921, + "args": { + "External id": 291889,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367589973.826, "dur": 15.515, + "args": { + "External id": 291890,"kernel_hash": "cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ld/cldqui4codyczhkdrz3ctt5pogllxs6yduwk43jmvbof3ofxmkjv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367590007.459, "dur": 131.752, + "args": { + "External id": 291891,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367590059.462, "dur": 11.541, + "args": { + "External id": 291892,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367590064.452, "dur": 5.743, + "args": { + "External id": 291893,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367590073.654, "dur": 4.642, + "args": { + "External id": 291894,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367590079.482, "dur": 1.426, + "args": { + "External id": 291895,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367590086.013, "dur": 3.116, + "args": { + "External id": 291896,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367590149.052, "dur": 66.156, + "args": { + "External id": 291897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070554, "tid": 2070554, + "ts": 5333367590250.640, "dur": 31.341, + "args": { + "External id": 291898,"kernel_hash": "cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/of/cofo2xf3bigdfueamjfcjan5mqabwcl7bhbdhlz6jilhn2xeraxt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367590290.344, "dur": 45.812, + "args": { + "External id": 291899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367590343.093, "dur": 35.434, + "args": { + "External id": 291900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367590402.019, "dur": 32.081, + "args": { + "External id": 291901,"kernel_hash": "c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/4q/c4qey7euxloybjhoogrguqzxinnks6rjj7vhn5aetaz3osquskbk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367590439.755, "dur": 35.258, + "args": { + "External id": 291902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070554, "tid": 2070554, + "ts": 5333367590492.361, "dur": 19.407, + "args": { + "External id": 291903,"kernel_hash": "cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyz3pl7hmgkcq4azweuf2wmz7unjnnxmcmzlxe36c4wfauvm33mj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.31)", "pid": 2070554, "tid": 2070554, + "ts": 5333367590713.771, "dur": 34.111, + "args": { + "External id": 291904,"Record function id": 0, "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070554, "tid": 2070554, + "ts": 5333367590818.853, "dur": 38.844, + "args": { + "External id": 291905,"Record function id": 0, "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2070554, "tid": 2070554, + "ts": 5333367590858.846, "dur": 188.469, + "args": { + "External id": 291906,"Record function id": 0, "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367590892.620, "dur": 146.931, + "args": { + "External id": 291907,"Sequence number": 1209225, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [8388608, 2048, 1]], "Input Dims": [[2048], [16, 4096, 2048]], "Ev Idx": 7439 + } + }, + { + "ph": "s", "id": 7, "pid": 2070554, "tid": 2070554, "ts": 5333367590892.620, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070554, "tid": 2070554, + "ts": 5333367590959.444, "dur": 43.820, + "args": { + "External id": 291908,"kernel_hash": "cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/qp/cqpt7mwgy5yqmh72goig5li3mr2l2sinhq27eouv3cyvn3ym4dh5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367591115.433, "dur": 0.934, + "args": { + "External id": 291909,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367591125.345, "dur": 12.226, + "args": { + "External id": 291910,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591132.201, "dur": 2.371, + "args": { + "External id": 291911,"Record function id": 0, "Concrete Inputs": ["", "[16, 8191]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367591142.138, "dur": 3.523, + "args": { + "External id": 291912,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591143.784, "dur": 1.036, + "args": { + "External id": 291913,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367591146.881, "dur": 3.373, + "args": { + "External id": 291914,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591148.338, "dur": 0.847, + "args": { + "External id": 291915,"Record function id": 0, "Concrete Inputs": ["", "[16, 1]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::full_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367591158.414, "dur": 66.945, + "args": { + "External id": 291916,"Record function id": 0, "Concrete Inputs": ["", "-100", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367591160.691, "dur": 28.869, + "args": { + "External id": 291917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["long int", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591181.891, "dur": 6.913, + "args": { + "External id": 291918,"Record function id": 0, "Concrete Inputs": ["[16, 1]", "[1, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591191.772, "dur": 33.147, + "args": { + "External id": 291919,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1, 1], []], "Input Dims": [[16, 1], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070554, "tid": 2070554, + "ts": 5333367591235.574, "dur": 33.035, + "args": { + "External id": 291920,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8192, 1], [1, 1]], []], "Input Dims": [[[16, 8191], [16, 1]], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367591275.972, "dur": 4.036, + "args": { + "External id": 291921,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591277.846, "dur": 1.108, + "args": { + "External id": 291922,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070554, "tid": 2070554, + "ts": 5333367591285.221, "dur": 41.551, + "args": { + "External id": 291923,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070554, "tid": 2070554, + "ts": 5333367591287.680, "dur": 38.908, + "args": { + "External id": 291924,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367591290.006, "dur": 8.416, + "args": { + "External id": 291925,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367591294.463, "dur": 3.578, + "args": { + "External id": 291926,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591299.708, "dur": 26.425, + "args": { + "External id": 291927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 7459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367591353.364, "dur": 7.556, + "args": { + "External id": 291928,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7460 + } + }, + { + "ph": "s", "id": 6, "pid": 2070554, "tid": 2070554, "ts": 5333367591353.364, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367591363.383, "dur": 1.219, + "args": { + "External id": 291929,"Sequence number": 1209227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367591392.718, "dur": 19113.852, + "args": { + "External id": 291930,"Sequence number": 1209227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 7462 + } + }, + { + "ph": "s", "id": 5, "pid": 2070554, "tid": 2070554, "ts": 5333367591392.718, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367591424.421, "dur": 33.356, + "args": { + "External id": 291931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367591425.780, "dur": 8.758, + "args": { + "External id": 291932,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591427.572, "dur": 6.597, + "args": { + "External id": 291933,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591437.011, "dur": 20.469, + "args": { + "External id": 291934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591438.067, "dur": 18.582, + "args": { + "External id": 291935,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367591461.056, "dur": 21.997, + "args": { + "External id": 291936,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367591461.823, "dur": 4.126, + "args": { + "External id": 291937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591462.936, "dur": 2.730, + "args": { + "External id": 291938,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591466.515, "dur": 16.347, + "args": { + "External id": 291939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591469.358, "dur": 13.151, + "args": { + "External id": 291940,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070554, "tid": 2070554, + "ts": 5333367591490.371, "dur": 17.975, + "args": { + "External id": 291941,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367591491.942, "dur": 3.280, + "args": { + "External id": 291942,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591495.735, "dur": 12.345, + "args": { + "External id": 291943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591496.421, "dur": 11.310, + "args": { + "External id": 291944,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070554, "tid": 2070554, + "ts": 5333367591516.069, "dur": 28.094, + "args": { + "External id": 291945,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367591548.690, "dur": 63.093, + "args": { + "External id": 291946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367591552.172, "dur": 59.205, + "args": { + "External id": 291947,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591560.084, "dur": 1.021, + "args": { + "External id": 291948,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367591562.608, "dur": 28.453, + "args": { + "External id": 291949,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367591566.536, "dur": 24.206, + "args": { + "External id": 291950,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367591569.087, "dur": 3.038, + "args": { + "External id": 291951,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367591572.953, "dur": 17.422, + "args": { + "External id": 291952,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070554, "tid": 2070554, + "ts": 5333367591616.991, "dur": 12778.487, + "args": { + "External id": 291953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070554, "tid": 2070554, + "ts": 5333367591653.524, "dur": 12740.869, + "args": { + "External id": 291954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367604405.539, "dur": 7.163, + "args": { + "External id": 291955,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367604409.543, "dur": 0.973, + "args": { + "External id": 291956,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367604419.829, "dur": 119.759, + "args": { + "External id": 291957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367604425.588, "dur": 6.645, + "args": { + "External id": 291958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367604428.635, "dur": 2.771, + "args": { + "External id": 291959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367604430.453, "dur": 0.664, + "args": { + "External id": 291960,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367604434.457, "dur": 104.507, + "args": { + "External id": 291961,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367604437.755, "dur": 100.135, + "args": { + "External id": 291962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367604543.565, "dur": 4.371, + "args": { + "External id": 291963,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367604545.452, "dur": 1.247, + "args": { + "External id": 291964,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367604558.091, "dur": 2.516, + "args": { + "External id": 291965,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367604570.282, "dur": 6.838, + "args": { + "External id": 291966,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367604572.674, "dur": 4.177, + "args": { + "External id": 291967,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367604792.533, "dur": 251.763, + "args": { + "External id": 291968,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367604797.810, "dur": 3.496, + "args": { + "External id": 291969,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367604803.830, "dur": 239.922, + "args": { + "External id": 291970,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367604810.106, "dur": 0.606, + "args": { + "External id": 291971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367604813.232, "dur": 30.995, + "args": { + "External id": 291972,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367604847.067, "dur": 5.738, + "args": { + "External id": 291973,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367604851.224, "dur": 1.066, + "args": { + "External id": 291974,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367604854.369, "dur": 29.724, + "args": { + "External id": 291975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367604855.668, "dur": 1.406, + "args": { + "External id": 291976,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367604859.608, "dur": 24.086, + "args": { + "External id": 291977,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367604863.640, "dur": 3.237, + "args": { + "External id": 291978,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367604886.537, "dur": 27.136, + "args": { + "External id": 291979,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367604916.777, "dur": 19.462, + "args": { + "External id": 291980,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367604943.066, "dur": 18.544, + "args": { + "External id": 291981,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367604964.315, "dur": 16.466, + "args": { + "External id": 291982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367604983.527, "dur": 23.877, + "args": { + "External id": 291983,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367604985.793, "dur": 1.438, + "args": { + "External id": 291984,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367604989.529, "dur": 0.862, + "args": { + "External id": 291985,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605009.542, "dur": 15.719, + "args": { + "External id": 291986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605030.275, "dur": 11.973, + "args": { + "External id": 291987,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367605052.571, "dur": 2.341, + "args": { + "External id": 291988,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367605062.684, "dur": 4.406, + "args": { + "External id": 291989,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605064.985, "dur": 1.016, + "args": { + "External id": 291990,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367605157.816, "dur": 94.572, + "args": { + "External id": 291991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367605260.436, "dur": 6.277, + "args": { + "External id": 291992,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605263.597, "dur": 1.156, + "args": { + "External id": 291993,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605268.462, "dur": 33.446, + "args": { + "External id": 291994,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367605312.640, "dur": 7.121, + "args": { + "External id": 291995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367605314.884, "dur": 4.154, + "args": { + "External id": 291996,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605316.990, "dur": 1.704, + "args": { + "External id": 291997,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367605323.945, "dur": 50.763, + "args": { + "External id": 291998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367605325.278, "dur": 48.597, + "args": { + "External id": 291999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605380.087, "dur": 19.834, + "args": { + "External id": 292000,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367605406.553, "dur": 6.986, + "args": { + "External id": 292001,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605411.009, "dur": 0.965, + "args": { + "External id": 292002,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367605418.567, "dur": 52.375, + "args": { + "External id": 292003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367605419.418, "dur": 4.042, + "args": { + "External id": 292004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367605420.362, "dur": 2.538, + "args": { + "External id": 292005,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605421.372, "dur": 1.366, + "args": { + "External id": 292006,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367605424.109, "dur": 46.398, + "args": { + "External id": 292007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367605427.155, "dur": 42.653, + "args": { + "External id": 292008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367605475.329, "dur": 5.659, + "args": { + "External id": 292009,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605477.122, "dur": 3.032, + "args": { + "External id": 292010,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367605487.937, "dur": 2.153, + "args": { + "External id": 292011,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367605498.687, "dur": 8.691, + "args": { + "External id": 292012,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367605502.981, "dur": 4.102, + "args": { + "External id": 292013,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367605606.302, "dur": 242.159, + "args": { + "External id": 292014,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367605608.205, "dur": 2.185, + "args": { + "External id": 292015,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367605611.934, "dur": 236.046, + "args": { + "External id": 292016,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367605613.633, "dur": 0.393, + "args": { + "External id": 292017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367605615.012, "dur": 63.170, + "args": { + "External id": 292018,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367605681.394, "dur": 3.318, + "args": { + "External id": 292019,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605683.270, "dur": 0.881, + "args": { + "External id": 292020,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367605685.769, "dur": 30.248, + "args": { + "External id": 292021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367605687.277, "dur": 2.020, + "args": { + "External id": 292022,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367605693.193, "dur": 22.538, + "args": { + "External id": 292023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605698.599, "dur": 3.101, + "args": { + "External id": 292024,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367605717.825, "dur": 23.386, + "args": { + "External id": 292025,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605743.212, "dur": 15.517, + "args": { + "External id": 292026,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367605761.819, "dur": 15.671, + "args": { + "External id": 292027,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605779.155, "dur": 14.874, + "args": { + "External id": 292028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367605795.677, "dur": 23.775, + "args": { + "External id": 292029,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605797.519, "dur": 1.614, + "args": { + "External id": 292030,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605803.487, "dur": 0.761, + "args": { + "External id": 292031,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605820.939, "dur": 13.207, + "args": { + "External id": 292032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367605835.334, "dur": 11.587, + "args": { + "External id": 292033,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367605856.277, "dur": 3.896, + "args": { + "External id": 292034,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367605870.249, "dur": 3.765, + "args": { + "External id": 292035,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367605872.675, "dur": 0.517, + "args": { + "External id": 292036,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367605950.909, "dur": 62.606, + "args": { + "External id": 292037,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367606018.786, "dur": 7.448, + "args": { + "External id": 292038,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606023.468, "dur": 1.657, + "args": { + "External id": 292039,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606027.823, "dur": 29.967, + "args": { + "External id": 292040,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367606062.747, "dur": 5.457, + "args": { + "External id": 292041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367606064.254, "dur": 3.117, + "args": { + "External id": 292042,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606066.235, "dur": 0.961, + "args": { + "External id": 292043,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367606070.850, "dur": 44.103, + "args": { + "External id": 292044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367606074.166, "dur": 40.062, + "args": { + "External id": 292045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606119.205, "dur": 16.423, + "args": { + "External id": 292046,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367606141.082, "dur": 3.646, + "args": { + "External id": 292047,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606142.944, "dur": 0.732, + "args": { + "External id": 292048,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367606149.004, "dur": 73.349, + "args": { + "External id": 292049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367606149.926, "dur": 8.757, + "args": { + "External id": 292050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367606150.799, "dur": 7.317, + "args": { + "External id": 292051,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606157.300, "dur": 0.718, + "args": { + "External id": 292052,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367606159.343, "dur": 62.385, + "args": { + "External id": 292053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367606159.959, "dur": 60.912, + "args": { + "External id": 292054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367606229.005, "dur": 4.447, + "args": { + "External id": 292055,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606231.169, "dur": 1.129, + "args": { + "External id": 292056,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367606240.160, "dur": 1.683, + "args": { + "External id": 292057,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367606249.705, "dur": 8.094, + "args": { + "External id": 292058,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367606253.539, "dur": 4.019, + "args": { + "External id": 292059,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367606354.010, "dur": 238.393, + "args": { + "External id": 292060,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367606356.401, "dur": 2.150, + "args": { + "External id": 292061,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367606360.198, "dur": 231.732, + "args": { + "External id": 292062,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367606364.110, "dur": 0.242, + "args": { + "External id": 292063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367606365.405, "dur": 33.258, + "args": { + "External id": 292064,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367606400.459, "dur": 3.648, + "args": { + "External id": 292065,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606402.720, "dur": 1.132, + "args": { + "External id": 292066,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367606405.240, "dur": 32.116, + "args": { + "External id": 292067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367606410.468, "dur": 1.670, + "args": { + "External id": 292068,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367606413.436, "dur": 23.593, + "args": { + "External id": 292069,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606415.801, "dur": 2.291, + "args": { + "External id": 292070,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367606438.605, "dur": 31.939, + "args": { + "External id": 292071,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606472.431, "dur": 26.273, + "args": { + "External id": 292072,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367606501.243, "dur": 20.232, + "args": { + "External id": 292073,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606522.925, "dur": 14.769, + "args": { + "External id": 292074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367606539.641, "dur": 23.687, + "args": { + "External id": 292075,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606541.535, "dur": 1.318, + "args": { + "External id": 292076,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606546.811, "dur": 0.666, + "args": { + "External id": 292077,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606564.647, "dur": 13.366, + "args": { + "External id": 292078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606579.316, "dur": 11.567, + "args": { + "External id": 292079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367606598.910, "dur": 1.628, + "args": { + "External id": 292080,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367606608.562, "dur": 3.196, + "args": { + "External id": 292081,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606610.576, "dur": 0.498, + "args": { + "External id": 292082,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367606738.690, "dur": 62.283, + "args": { + "External id": 292083,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367606808.687, "dur": 8.309, + "args": { + "External id": 292084,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606811.710, "dur": 3.504, + "args": { + "External id": 292085,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606818.553, "dur": 27.286, + "args": { + "External id": 292086,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367606851.156, "dur": 5.280, + "args": { + "External id": 292087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367606852.757, "dur": 3.044, + "args": { + "External id": 292088,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606854.230, "dur": 1.341, + "args": { + "External id": 292089,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367606861.710, "dur": 43.731, + "args": { + "External id": 292090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367606862.982, "dur": 41.686, + "args": { + "External id": 292091,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367606909.371, "dur": 17.076, + "args": { + "External id": 292092,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367606931.925, "dur": 4.196, + "args": { + "External id": 292093,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606933.858, "dur": 1.081, + "args": { + "External id": 292094,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367606953.605, "dur": 50.677, + "args": { + "External id": 292095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367606954.717, "dur": 5.605, + "args": { + "External id": 292096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367606957.394, "dur": 2.302, + "args": { + "External id": 292097,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367606958.639, "dur": 0.922, + "args": { + "External id": 292098,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367606961.255, "dur": 42.646, + "args": { + "External id": 292099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367606962.090, "dur": 41.256, + "args": { + "External id": 292100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367607008.260, "dur": 3.640, + "args": { + "External id": 292101,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607009.931, "dur": 0.987, + "args": { + "External id": 292102,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367607020.408, "dur": 1.840, + "args": { + "External id": 292103,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607030.474, "dur": 8.632, + "args": { + "External id": 292104,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607032.519, "dur": 6.294, + "args": { + "External id": 292105,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367607122.507, "dur": 218.278, + "args": { + "External id": 292106,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607124.810, "dur": 1.897, + "args": { + "External id": 292107,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367607128.413, "dur": 212.012, + "args": { + "External id": 292108,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367607130.303, "dur": 0.269, + "args": { + "External id": 292109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367607133.862, "dur": 26.520, + "args": { + "External id": 292110,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367607162.076, "dur": 3.165, + "args": { + "External id": 292111,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607163.996, "dur": 0.872, + "args": { + "External id": 292112,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367607182.156, "dur": 29.951, + "args": { + "External id": 292113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607183.634, "dur": 2.738, + "args": { + "External id": 292114,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367607188.042, "dur": 23.718, + "args": { + "External id": 292115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607193.775, "dur": 2.780, + "args": { + "External id": 292116,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367607213.567, "dur": 22.806, + "args": { + "External id": 292117,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607238.162, "dur": 13.982, + "args": { + "External id": 292118,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367607254.807, "dur": 15.370, + "args": { + "External id": 292119,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607273.862, "dur": 13.733, + "args": { + "External id": 292120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367607289.283, "dur": 22.921, + "args": { + "External id": 292121,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607291.199, "dur": 1.416, + "args": { + "External id": 292122,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607294.943, "dur": 1.193, + "args": { + "External id": 292123,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607313.573, "dur": 13.153, + "args": { + "External id": 292124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607327.857, "dur": 11.499, + "args": { + "External id": 292125,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367607350.351, "dur": 2.467, + "args": { + "External id": 292126,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367607362.398, "dur": 3.530, + "args": { + "External id": 292127,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607364.625, "dur": 0.431, + "args": { + "External id": 292128,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367607434.388, "dur": 55.485, + "args": { + "External id": 292129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367607495.202, "dur": 4.579, + "args": { + "External id": 292130,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607497.534, "dur": 1.287, + "args": { + "External id": 292131,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607501.267, "dur": 23.860, + "args": { + "External id": 292132,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367607529.646, "dur": 7.793, + "args": { + "External id": 292133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367607534.026, "dur": 2.721, + "args": { + "External id": 292134,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607535.703, "dur": 0.877, + "args": { + "External id": 292135,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367607540.367, "dur": 40.387, + "args": { + "External id": 292136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367607541.345, "dur": 38.808, + "args": { + "External id": 292137,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607584.181, "dur": 15.574, + "args": { + "External id": 292138,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367607609.088, "dur": 6.939, + "args": { + "External id": 292139,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607613.984, "dur": 0.959, + "args": { + "External id": 292140,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367607668.191, "dur": 60.387, + "args": { + "External id": 292141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367607669.237, "dur": 6.966, + "args": { + "External id": 292142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367607670.548, "dur": 4.825, + "args": { + "External id": 292143,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607671.876, "dur": 3.007, + "args": { + "External id": 292144,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367607677.079, "dur": 51.048, + "args": { + "External id": 292145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367607677.895, "dur": 49.615, + "args": { + "External id": 292146,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367607734.129, "dur": 4.000, + "args": { + "External id": 292147,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607736.084, "dur": 1.114, + "args": { + "External id": 292148,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367607747.032, "dur": 1.824, + "args": { + "External id": 292149,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607756.855, "dur": 5.869, + "args": { + "External id": 292150,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607758.706, "dur": 3.726, + "args": { + "External id": 292151,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367607852.241, "dur": 201.090, + "args": { + "External id": 292152,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607854.858, "dur": 1.942, + "args": { + "External id": 292153,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367607860.241, "dur": 192.709, + "args": { + "External id": 292154,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367607861.506, "dur": 0.429, + "args": { + "External id": 292155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367607866.127, "dur": 24.004, + "args": { + "External id": 292156,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367607891.779, "dur": 3.412, + "args": { + "External id": 292157,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367607893.935, "dur": 0.758, + "args": { + "External id": 292158,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367607896.126, "dur": 28.808, + "args": { + "External id": 292159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367607897.193, "dur": 3.626, + "args": { + "External id": 292160,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367607902.182, "dur": 22.497, + "args": { + "External id": 292161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607905.171, "dur": 2.423, + "args": { + "External id": 292162,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367607926.247, "dur": 20.818, + "args": { + "External id": 292163,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607948.637, "dur": 16.046, + "args": { + "External id": 292164,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367607969.596, "dur": 14.277, + "args": { + "External id": 292165,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367607985.416, "dur": 13.527, + "args": { + "External id": 292166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367608000.732, "dur": 21.308, + "args": { + "External id": 292167,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608002.542, "dur": 1.343, + "args": { + "External id": 292168,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608005.844, "dur": 0.958, + "args": { + "External id": 292169,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608023.444, "dur": 13.065, + "args": { + "External id": 292170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608040.079, "dur": 11.867, + "args": { + "External id": 292171,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367608059.771, "dur": 1.820, + "args": { + "External id": 292172,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367608070.588, "dur": 3.023, + "args": { + "External id": 292173,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608072.397, "dur": 0.516, + "args": { + "External id": 292174,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367608141.629, "dur": 77.272, + "args": { + "External id": 292175,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367608225.717, "dur": 6.169, + "args": { + "External id": 292176,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608228.718, "dur": 1.560, + "args": { + "External id": 292177,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608233.432, "dur": 28.859, + "args": { + "External id": 292178,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367608269.864, "dur": 8.007, + "args": { + "External id": 292179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367608271.824, "dur": 5.276, + "args": { + "External id": 292180,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608273.621, "dur": 3.213, + "args": { + "External id": 292181,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367608280.860, "dur": 46.374, + "args": { + "External id": 292182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367608281.877, "dur": 44.781, + "args": { + "External id": 292183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608331.571, "dur": 15.853, + "args": { + "External id": 292184,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367608353.453, "dur": 6.044, + "args": { + "External id": 292185,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608357.759, "dur": 0.564, + "args": { + "External id": 292186,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367608363.916, "dur": 50.502, + "args": { + "External id": 292187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367608364.955, "dur": 3.886, + "args": { + "External id": 292188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367608365.919, "dur": 2.366, + "args": { + "External id": 292189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608367.298, "dur": 0.839, + "args": { + "External id": 292190,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367608369.692, "dur": 44.395, + "args": { + "External id": 292191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367608372.019, "dur": 41.542, + "args": { + "External id": 292192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367608418.715, "dur": 3.128, + "args": { + "External id": 292193,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608420.181, "dur": 0.732, + "args": { + "External id": 292194,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367608427.932, "dur": 1.855, + "args": { + "External id": 292195,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367608437.891, "dur": 8.630, + "args": { + "External id": 292196,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367608442.359, "dur": 3.897, + "args": { + "External id": 292197,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367608530.259, "dur": 250.632, + "args": { + "External id": 292198,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367608532.402, "dur": 2.060, + "args": { + "External id": 292199,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367608538.536, "dur": 241.887, + "args": { + "External id": 292200,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367608539.810, "dur": 0.243, + "args": { + "External id": 292201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367608541.126, "dur": 23.219, + "args": { + "External id": 292202,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367608565.855, "dur": 4.652, + "args": { + "External id": 292203,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608567.651, "dur": 2.559, + "args": { + "External id": 292204,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367608571.844, "dur": 24.031, + "args": { + "External id": 292205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367608572.970, "dur": 1.714, + "args": { + "External id": 292206,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367608578.199, "dur": 17.349, + "args": { + "External id": 292207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608580.651, "dur": 2.289, + "args": { + "External id": 292208,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367608597.244, "dur": 70.282, + "args": { + "External id": 292209,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608670.222, "dur": 15.768, + "args": { + "External id": 292210,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367608689.110, "dur": 17.455, + "args": { + "External id": 292211,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608707.923, "dur": 14.190, + "args": { + "External id": 292212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367608723.941, "dur": 27.075, + "args": { + "External id": 292213,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608725.993, "dur": 1.660, + "args": { + "External id": 292214,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608731.767, "dur": 3.333, + "args": { + "External id": 292215,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608752.545, "dur": 13.495, + "args": { + "External id": 292216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608767.411, "dur": 12.113, + "args": { + "External id": 292217,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367608789.047, "dur": 2.464, + "args": { + "External id": 292218,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367608801.331, "dur": 3.468, + "args": { + "External id": 292219,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608803.231, "dur": 0.665, + "args": { + "External id": 292220,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367608874.493, "dur": 59.166, + "args": { + "External id": 292221,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367608938.507, "dur": 6.679, + "args": { + "External id": 292222,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608943.329, "dur": 0.901, + "args": { + "External id": 292223,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367608946.596, "dur": 26.853, + "args": { + "External id": 292224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367608978.124, "dur": 5.020, + "args": { + "External id": 292225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367608979.331, "dur": 3.177, + "args": { + "External id": 292226,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367608980.926, "dur": 1.377, + "args": { + "External id": 292227,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367608985.938, "dur": 44.143, + "args": { + "External id": 292228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367608989.243, "dur": 40.019, + "args": { + "External id": 292229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609033.930, "dur": 15.635, + "args": { + "External id": 292230,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367609055.460, "dur": 4.197, + "args": { + "External id": 292231,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609057.216, "dur": 1.443, + "args": { + "External id": 292232,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367609063.774, "dur": 49.746, + "args": { + "External id": 292233,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367609064.648, "dur": 5.308, + "args": { + "External id": 292234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367609065.684, "dur": 3.747, + "args": { + "External id": 292235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609068.581, "dur": 0.728, + "args": { + "External id": 292236,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367609070.646, "dur": 42.529, + "args": { + "External id": 292237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367609071.236, "dur": 41.227, + "args": { + "External id": 292238,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367609117.733, "dur": 3.295, + "args": { + "External id": 292239,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609119.393, "dur": 0.723, + "args": { + "External id": 292240,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367609126.133, "dur": 1.696, + "args": { + "External id": 292241,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609135.521, "dur": 7.967, + "args": { + "External id": 292242,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609139.322, "dur": 3.875, + "args": { + "External id": 292243,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367609243.552, "dur": 193.985, + "args": { + "External id": 292244,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609247.969, "dur": 3.156, + "args": { + "External id": 292245,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367609252.542, "dur": 184.545, + "args": { + "External id": 292246,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367609253.842, "dur": 0.270, + "args": { + "External id": 292247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367609255.085, "dur": 22.231, + "args": { + "External id": 292248,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367609278.555, "dur": 5.923, + "args": { + "External id": 292249,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609283.334, "dur": 0.754, + "args": { + "External id": 292250,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367609285.451, "dur": 24.580, + "args": { + "External id": 292251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609288.417, "dur": 1.644, + "args": { + "External id": 292252,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367609291.449, "dur": 18.346, + "args": { + "External id": 292253,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609293.922, "dur": 2.472, + "args": { + "External id": 292254,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367609311.356, "dur": 21.306, + "args": { + "External id": 292255,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609333.797, "dur": 14.754, + "args": { + "External id": 292256,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367609351.144, "dur": 13.566, + "args": { + "External id": 292257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609366.021, "dur": 14.382, + "args": { + "External id": 292258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367609382.126, "dur": 23.656, + "args": { + "External id": 292259,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609386.142, "dur": 1.048, + "args": { + "External id": 292260,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609390.950, "dur": 0.818, + "args": { + "External id": 292261,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609407.039, "dur": 16.627, + "args": { + "External id": 292262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609424.635, "dur": 11.435, + "args": { + "External id": 292263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367609443.910, "dur": 2.161, + "args": { + "External id": 292264,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367609455.153, "dur": 3.240, + "args": { + "External id": 292265,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609457.167, "dur": 0.540, + "args": { + "External id": 292266,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367609521.918, "dur": 53.022, + "args": { + "External id": 292267,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367609582.015, "dur": 4.745, + "args": { + "External id": 292268,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609584.631, "dur": 1.085, + "args": { + "External id": 292269,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609588.118, "dur": 23.506, + "args": { + "External id": 292270,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367609615.790, "dur": 41.748, + "args": { + "External id": 292271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367609617.382, "dur": 4.320, + "args": { + "External id": 292272,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609619.595, "dur": 1.930, + "args": { + "External id": 292273,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367609664.433, "dur": 52.355, + "args": { + "External id": 292274,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367609665.614, "dur": 50.588, + "args": { + "External id": 292275,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609721.053, "dur": 17.163, + "args": { + "External id": 292276,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367609744.749, "dur": 4.814, + "args": { + "External id": 292277,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609747.437, "dur": 0.843, + "args": { + "External id": 292278,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367609753.762, "dur": 51.341, + "args": { + "External id": 292279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367609754.838, "dur": 6.483, + "args": { + "External id": 292280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367609758.039, "dur": 2.766, + "args": { + "External id": 292281,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609759.627, "dur": 1.049, + "args": { + "External id": 292282,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367609761.970, "dur": 42.575, + "args": { + "External id": 292283,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367609762.557, "dur": 41.318, + "args": { + "External id": 292284,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367609809.017, "dur": 3.618, + "args": { + "External id": 292285,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609810.999, "dur": 0.679, + "args": { + "External id": 292286,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367609821.564, "dur": 1.600, + "args": { + "External id": 292287,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609830.092, "dur": 6.033, + "args": { + "External id": 292288,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609832.195, "dur": 3.653, + "args": { + "External id": 292289,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367609917.090, "dur": 195.187, + "args": { + "External id": 292290,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609921.982, "dur": 2.103, + "args": { + "External id": 292291,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367609925.426, "dur": 186.275, + "args": { + "External id": 292292,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367609926.749, "dur": 0.488, + "args": { + "External id": 292293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367609930.750, "dur": 22.247, + "args": { + "External id": 292294,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367609954.313, "dur": 6.403, + "args": { + "External id": 292295,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367609959.326, "dur": 0.927, + "args": { + "External id": 292296,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367609961.769, "dur": 22.996, + "args": { + "External id": 292297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367609962.795, "dur": 1.627, + "args": { + "External id": 292298,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367609965.714, "dur": 18.820, + "args": { + "External id": 292299,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367609968.306, "dur": 2.912, + "args": { + "External id": 292300,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367609986.243, "dur": 23.582, + "args": { + "External id": 292301,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610011.043, "dur": 13.147, + "args": { + "External id": 292302,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367610026.868, "dur": 15.087, + "args": { + "External id": 292303,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610045.403, "dur": 13.343, + "args": { + "External id": 292304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367610060.845, "dur": 21.728, + "args": { + "External id": 292305,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610063.487, "dur": 1.233, + "args": { + "External id": 292306,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610066.947, "dur": 1.026, + "args": { + "External id": 292307,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610083.811, "dur": 13.470, + "args": { + "External id": 292308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610098.523, "dur": 12.360, + "args": { + "External id": 292309,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367610120.910, "dur": 1.646, + "args": { + "External id": 292310,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367610130.042, "dur": 3.660, + "args": { + "External id": 292311,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610132.279, "dur": 0.562, + "args": { + "External id": 292312,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367610211.862, "dur": 55.995, + "args": { + "External id": 292313,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367610273.323, "dur": 5.640, + "args": { + "External id": 292314,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610276.319, "dur": 1.323, + "args": { + "External id": 292315,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610280.181, "dur": 27.163, + "args": { + "External id": 292316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367610311.712, "dur": 8.433, + "args": { + "External id": 292317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367610315.842, "dur": 3.683, + "args": { + "External id": 292318,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610317.999, "dur": 1.346, + "args": { + "External id": 292319,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367610322.655, "dur": 43.820, + "args": { + "External id": 292320,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367610323.962, "dur": 41.796, + "args": { + "External id": 292321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610370.257, "dur": 16.229, + "args": { + "External id": 292322,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367610392.178, "dur": 30.949, + "args": { + "External id": 292323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367610396.282, "dur": 26.464, + "args": { + "External id": 292324,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610403.584, "dur": 1.206, + "args": { + "External id": 292325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333367610429.382, "dur": 28.674, + "args": { + "External id": 292326,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070554, "tid": 2070554, + "ts": 5333367610432.025, "dur": 25.823, + "args": { + "External id": 292327,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610436.569, "dur": 3.486, + "args": { + "External id": 292328,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610441.046, "dur": 16.360, + "args": { + "External id": 292329,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2070554, + "ts": 5333367610475.160, "dur": 5.506, + "args": { + "External id": 292330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2070554, + "ts": 5333367610477.534, "dur": 2.899, + "args": { + "External id": 292331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070554, "tid": 2070554, + "ts": 5333367610481.739, "dur": 1.411, + "args": { + "External id": 292332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070554, "tid": 2070554, + "ts": 5333367610482.466, "dur": 0.606, + "args": { + "External id": 292333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367610536.959, "dur": 3.104, + "args": { + "External id": 292334,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32000]", "5", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367610740.258, "dur": 8.927, + "args": { + "External id": 292335,"Sequence number": 1209228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7867 + } + }, + { + "ph": "s", "id": 4, "pid": 2070554, "tid": 2070554, "ts": 5333367610740.258, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367610751.949, "dur": 1.305, + "args": { + "External id": 292336,"Sequence number": 1209229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[131072000, 32000, 1], []], "Input Dims": [[16, 4096, 32000], []], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunction", "pid": 2070554, "tid": 2070554, + "ts": 5333367610787.303, "dur": 8905.444, + "args": { + "External id": 292337,"Sequence number": 1209229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "1.", "8"], "Input type": ["c10::BFloat16", "c10::Half", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [32000, 1], [2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 32000], [32000, 2048], [], [], [], []], "Ev Idx": 7869 + } + }, + { + "ph": "s", "id": 3, "pid": 2070554, "tid": 2070554, "ts": 5333367610787.303, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367610804.365, "dur": 38.442, + "args": { + "External id": 292338,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367610806.257, "dur": 10.966, + "args": { + "External id": 292339,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610810.060, "dur": 6.670, + "args": { + "External id": 292340,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610819.081, "dur": 23.521, + "args": { + "External id": 292341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610821.603, "dur": 20.552, + "args": { + "External id": 292342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367610846.748, "dur": 37.347, + "args": { + "External id": 292343,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367610847.682, "dur": 4.419, + "args": { + "External id": 292344,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610849.503, "dur": 2.357, + "args": { + "External id": 292345,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610867.435, "dur": 16.470, + "args": { + "External id": 292346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610870.222, "dur": 13.289, + "args": { + "External id": 292347,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070554, "tid": 2070554, + "ts": 5333367610891.564, "dur": 20.326, + "args": { + "External id": 292348,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367610893.693, "dur": 3.160, + "args": { + "External id": 292349,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610897.720, "dur": 13.878, + "args": { + "External id": 292350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367610900.664, "dur": 10.575, + "args": { + "External id": 292351,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367610918.100, "dur": 51.214, + "args": { + "External id": 292352,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610965.643, "dur": 1.843, + "args": { + "External id": 292353,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367610976.646, "dur": 94.777, + "args": { + "External id": 292354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367610977.903, "dur": 5.565, + "args": { + "External id": 292355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367610979.710, "dur": 2.949, + "args": { + "External id": 292356,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367610981.172, "dur": 0.888, + "args": { + "External id": 292357,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367610984.622, "dur": 85.964, + "args": { + "External id": 292358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367610985.980, "dur": 83.437, + "args": { + "External id": 292359,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367611075.479, "dur": 6.172, + "args": { + "External id": 292360,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367611079.580, "dur": 1.059, + "args": { + "External id": 292361,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "0"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367611086.447, "dur": 1.931, + "args": { + "External id": 292362,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611095.259, "dur": 5.187, + "args": { + "External id": 292363,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611096.985, "dur": 3.175, + "args": { + "External id": 292364,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367611233.916, "dur": 322.158, + "args": { + "External id": 292365,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611236.753, "dur": 5.644, + "args": { + "External id": 292366,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367611244.186, "dur": 311.366, + "args": { + "External id": 292367,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367611245.990, "dur": 0.371, + "args": { + "External id": 292368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367611247.608, "dur": 26.522, + "args": { + "External id": 292369,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367611275.635, "dur": 5.837, + "args": { + "External id": 292370,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367611280.146, "dur": 0.953, + "args": { + "External id": 292371,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367611282.432, "dur": 28.481, + "args": { + "External id": 292372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611283.675, "dur": 1.428, + "args": { + "External id": 292373,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367611289.035, "dur": 21.618, + "args": { + "External id": 292374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611292.820, "dur": 3.309, + "args": { + "External id": 292375,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367611312.513, "dur": 25.084, + "args": { + "External id": 292376,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611339.347, "dur": 18.966, + "args": { + "External id": 292377,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367611364.436, "dur": 27.563, + "args": { + "External id": 292378,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611393.450, "dur": 41.390, + "args": { + "External id": 292379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367611436.858, "dur": 40.783, + "args": { + "External id": 292380,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611441.580, "dur": 1.365, + "args": { + "External id": 292381,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367611446.833, "dur": 1.432, + "args": { + "External id": 292382,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611479.520, "dur": 41.527, + "args": { + "External id": 292383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611522.480, "dur": 31.845, + "args": { + "External id": 292384,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367611563.196, "dur": 2.307, + "args": { + "External id": 292385,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367611572.996, "dur": 1.677, + "args": { + "External id": 292386,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611582.882, "dur": 4.396, + "args": { + "External id": 292387,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611584.420, "dur": 2.560, + "args": { + "External id": 292388,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367611712.607, "dur": 189.367, + "args": { + "External id": 292389,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611714.682, "dur": 3.909, + "args": { + "External id": 292390,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367611720.530, "dur": 181.012, + "args": { + "External id": 292391,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367611721.764, "dur": 0.406, + "args": { + "External id": 292392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367611722.868, "dur": 24.060, + "args": { + "External id": 292393,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367611748.252, "dur": 2.903, + "args": { + "External id": 292394,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367611749.786, "dur": 1.113, + "args": { + "External id": 292395,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367611754.448, "dur": 22.043, + "args": { + "External id": 292396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367611755.326, "dur": 1.998, + "args": { + "External id": 292397,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367611758.321, "dur": 17.817, + "args": { + "External id": 292398,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611762.101, "dur": 1.761, + "args": { + "External id": 292399,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367611777.667, "dur": 21.273, + "args": { + "External id": 292400,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611800.154, "dur": 12.884, + "args": { + "External id": 292401,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367611815.210, "dur": 14.574, + "args": { + "External id": 292402,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611830.866, "dur": 12.635, + "args": { + "External id": 292403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367611844.935, "dur": 21.119, + "args": { + "External id": 292404,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611848.968, "dur": 1.181, + "args": { + "External id": 292405,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367611852.055, "dur": 0.884, + "args": { + "External id": 292406,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611867.196, "dur": 12.268, + "args": { + "External id": 292407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367611888.598, "dur": 12.041, + "args": { + "External id": 292408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367611908.162, "dur": 1.720, + "args": { + "External id": 292409,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367611921.892, "dur": 30.139, + "args": { + "External id": 292410,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367611924.871, "dur": 6.193, + "args": { + "External id": 292411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367611927.322, "dur": 3.338, + "args": { + "External id": 292412,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367611933.270, "dur": 18.030, + "args": { + "External id": 292413,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367611958.424, "dur": 4.559, + "args": { + "External id": 292414,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367611961.205, "dur": 0.883, + "args": { + "External id": 292415,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367612045.362, "dur": 77.816, + "args": { + "External id": 292416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367612128.651, "dur": 4.966, + "args": { + "External id": 292417,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612131.214, "dur": 1.217, + "args": { + "External id": 292418,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612135.162, "dur": 46.132, + "args": { + "External id": 292419,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367612189.331, "dur": 9.585, + "args": { + "External id": 292420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367612191.519, "dur": 6.439, + "args": { + "External id": 292421,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612196.246, "dur": 1.517, + "args": { + "External id": 292422,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367612202.813, "dur": 54.930, + "args": { + "External id": 292423,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367612204.169, "dur": 52.890, + "args": { + "External id": 292424,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612262.597, "dur": 18.362, + "args": { + "External id": 292425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367612287.783, "dur": 4.881, + "args": { + "External id": 292426,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612290.157, "dur": 1.132, + "args": { + "External id": 292427,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367612297.243, "dur": 54.066, + "args": { + "External id": 292428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367612300.972, "dur": 4.522, + "args": { + "External id": 292429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367612302.179, "dur": 2.666, + "args": { + "External id": 292430,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612304.045, "dur": 0.689, + "args": { + "External id": 292431,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367612306.250, "dur": 44.706, + "args": { + "External id": 292432,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367612306.691, "dur": 43.470, + "args": { + "External id": 292433,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367612355.653, "dur": 5.562, + "args": { + "External id": 292434,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612359.448, "dur": 0.974, + "args": { + "External id": 292435,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "262144000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367612367.692, "dur": 2.320, + "args": { + "External id": 292436,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612377.560, "dur": 6.471, + "args": { + "External id": 292437,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612379.553, "dur": 4.125, + "args": { + "External id": 292438,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367612477.607, "dur": 235.811, + "args": { + "External id": 292439,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612479.550, "dur": 4.527, + "args": { + "External id": 292440,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367612485.293, "dur": 227.551, + "args": { + "External id": 292441,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367612486.587, "dur": 0.480, + "args": { + "External id": 292442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367612488.536, "dur": 22.100, + "args": { + "External id": 292443,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367612512.304, "dur": 5.244, + "args": { + "External id": 292444,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612516.351, "dur": 0.847, + "args": { + "External id": 292445,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367612518.551, "dur": 25.483, + "args": { + "External id": 292446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612519.697, "dur": 1.390, + "args": { + "External id": 292447,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367612524.431, "dur": 19.261, + "args": { + "External id": 292448,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612527.034, "dur": 3.141, + "args": { + "External id": 292449,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367612545.421, "dur": 22.534, + "args": { + "External id": 292450,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612569.128, "dur": 13.797, + "args": { + "External id": 292451,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367612585.047, "dur": 15.656, + "args": { + "External id": 292452,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612601.906, "dur": 13.376, + "args": { + "External id": 292453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367612616.918, "dur": 64.894, + "args": { + "External id": 292454,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612620.930, "dur": 0.992, + "args": { + "External id": 292455,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612662.144, "dur": 0.950, + "args": { + "External id": 292456,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612683.693, "dur": 13.889, + "args": { + "External id": 292457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612698.871, "dur": 12.620, + "args": { + "External id": 292458,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367612720.854, "dur": 3.604, + "args": { + "External id": 292459,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367612734.716, "dur": 1.599, + "args": { + "External id": 292460,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612744.683, "dur": 6.112, + "args": { + "External id": 292461,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612746.517, "dur": 4.036, + "args": { + "External id": 292462,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367612830.523, "dur": 180.730, + "args": { + "External id": 292463,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612832.248, "dur": 2.512, + "args": { + "External id": 292464,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367612836.044, "dur": 174.785, + "args": { + "External id": 292465,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367612837.457, "dur": 0.224, + "args": { + "External id": 292466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367612838.957, "dur": 22.369, + "args": { + "External id": 292467,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367612865.162, "dur": 3.217, + "args": { + "External id": 292468,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612867.460, "dur": 0.668, + "args": { + "External id": 292469,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367612869.168, "dur": 24.258, + "args": { + "External id": 292470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367612870.453, "dur": 1.435, + "args": { + "External id": 292471,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367612873.089, "dur": 20.052, + "args": { + "External id": 292472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612877.941, "dur": 1.929, + "args": { + "External id": 292473,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367612894.356, "dur": 17.567, + "args": { + "External id": 292474,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612913.103, "dur": 14.239, + "args": { + "External id": 292475,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367612929.421, "dur": 12.373, + "args": { + "External id": 292476,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612942.829, "dur": 12.456, + "args": { + "External id": 292477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367612957.157, "dur": 21.248, + "args": { + "External id": 292478,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612961.041, "dur": 1.376, + "args": { + "External id": 292479,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367612964.160, "dur": 0.628, + "args": { + "External id": 292480,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612979.391, "dur": 14.692, + "args": { + "External id": 292481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367612995.109, "dur": 14.852, + "args": { + "External id": 292482,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367613016.208, "dur": 1.278, + "args": { + "External id": 292483,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367613025.839, "dur": 27.443, + "args": { + "External id": 292484,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367613028.075, "dur": 7.022, + "args": { + "External id": 292485,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613030.805, "dur": 3.897, + "args": { + "External id": 292486,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367613036.690, "dur": 15.856, + "args": { + "External id": 292487,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367613062.194, "dur": 4.754, + "args": { + "External id": 292488,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613065.059, "dur": 1.004, + "args": { + "External id": 292489,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367613133.257, "dur": 84.294, + "args": { + "External id": 292490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367613224.901, "dur": 8.727, + "args": { + "External id": 292491,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613228.336, "dur": 3.692, + "args": { + "External id": 292492,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613235.493, "dur": 28.916, + "args": { + "External id": 292493,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367613270.482, "dur": 8.128, + "args": { + "External id": 292494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367613272.389, "dur": 5.511, + "args": { + "External id": 292495,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613276.421, "dur": 1.169, + "args": { + "External id": 292496,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367613282.291, "dur": 47.574, + "args": { + "External id": 292497,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367613283.627, "dur": 45.501, + "args": { + "External id": 292498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613334.497, "dur": 15.853, + "args": { + "External id": 292499,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367613356.763, "dur": 4.197, + "args": { + "External id": 292500,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613358.997, "dur": 0.729, + "args": { + "External id": 292501,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367613367.698, "dur": 50.038, + "args": { + "External id": 292502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367613368.516, "dur": 4.324, + "args": { + "External id": 292503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367613369.569, "dur": 2.444, + "args": { + "External id": 292504,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613370.884, "dur": 0.948, + "args": { + "External id": 292505,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367613373.458, "dur": 43.960, + "args": { + "External id": 292506,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367613374.779, "dur": 42.038, + "args": { + "External id": 292507,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367613422.205, "dur": 13.313, + "args": { + "External id": 292508,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613433.678, "dur": 0.886, + "args": { + "External id": 292509,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "524288000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367613442.222, "dur": 2.062, + "args": { + "External id": 292510,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613451.425, "dur": 6.329, + "args": { + "External id": 292511,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613453.604, "dur": 3.909, + "args": { + "External id": 292512,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367613543.287, "dur": 235.839, + "args": { + "External id": 292513,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613547.739, "dur": 1.778, + "args": { + "External id": 292514,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367613552.604, "dur": 225.960, + "args": { + "External id": 292515,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367613553.958, "dur": 0.219, + "args": { + "External id": 292516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367613555.275, "dur": 21.786, + "args": { + "External id": 292517,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367613578.420, "dur": 5.310, + "args": { + "External id": 292518,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613582.357, "dur": 0.944, + "args": { + "External id": 292519,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367613584.690, "dur": 22.821, + "args": { + "External id": 292520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613585.657, "dur": 1.342, + "args": { + "External id": 292521,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367613588.378, "dur": 18.745, + "args": { + "External id": 292522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613590.713, "dur": 2.749, + "args": { + "External id": 292523,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367613611.037, "dur": 59.712, + "args": { + "External id": 292524,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613673.295, "dur": 16.023, + "args": { + "External id": 292525,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367613692.749, "dur": 14.113, + "args": { + "External id": 292526,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613708.180, "dur": 13.904, + "args": { + "External id": 292527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367613723.853, "dur": 23.822, + "args": { + "External id": 292528,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613728.772, "dur": 1.408, + "args": { + "External id": 292529,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613731.892, "dur": 0.867, + "args": { + "External id": 292530,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613751.229, "dur": 13.239, + "args": { + "External id": 292531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613765.674, "dur": 11.301, + "args": { + "External id": 292532,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367613786.877, "dur": 2.495, + "args": { + "External id": 292533,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367613798.622, "dur": 1.468, + "args": { + "External id": 292534,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613806.136, "dur": 6.455, + "args": { + "External id": 292535,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613809.646, "dur": 2.707, + "args": { + "External id": 292536,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367613885.375, "dur": 161.914, + "args": { + "External id": 292537,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613886.679, "dur": 1.864, + "args": { + "External id": 292538,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367613889.846, "dur": 157.122, + "args": { + "External id": 292539,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367613891.094, "dur": 0.484, + "args": { + "External id": 292540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367613892.396, "dur": 20.264, + "args": { + "External id": 292541,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367613914.433, "dur": 2.746, + "args": { + "External id": 292542,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367613915.983, "dur": 1.016, + "args": { + "External id": 292543,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367613917.672, "dur": 23.831, + "args": { + "External id": 292544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367613918.596, "dur": 1.439, + "args": { + "External id": 292545,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367613921.226, "dur": 19.929, + "args": { + "External id": 292546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613927.425, "dur": 1.601, + "args": { + "External id": 292547,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367613942.898, "dur": 17.852, + "args": { + "External id": 292548,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613961.774, "dur": 11.433, + "args": { + "External id": 292549,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367613975.411, "dur": 12.199, + "args": { + "External id": 292550,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367613988.428, "dur": 11.883, + "args": { + "External id": 292551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367614001.635, "dur": 17.180, + "args": { + "External id": 292552,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614002.817, "dur": 0.970, + "args": { + "External id": 292553,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614005.027, "dur": 0.841, + "args": { + "External id": 292554,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614021.948, "dur": 11.524, + "args": { + "External id": 292555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614034.439, "dur": 11.556, + "args": { + "External id": 292556,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367614051.624, "dur": 1.120, + "args": { + "External id": 292557,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367614060.373, "dur": 25.980, + "args": { + "External id": 292558,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367614063.061, "dur": 6.494, + "args": { + "External id": 292559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614065.305, "dur": 3.798, + "args": { + "External id": 292560,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367614071.197, "dur": 14.471, + "args": { + "External id": 292561,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367614093.058, "dur": 4.293, + "args": { + "External id": 292562,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614095.534, "dur": 0.849, + "args": { + "External id": 292563,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367614164.823, "dur": 84.660, + "args": { + "External id": 292564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367614256.492, "dur": 11.011, + "args": { + "External id": 292565,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614262.402, "dur": 3.464, + "args": { + "External id": 292566,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614269.179, "dur": 25.614, + "args": { + "External id": 292567,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367614300.298, "dur": 6.461, + "args": { + "External id": 292568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367614302.211, "dur": 3.566, + "args": { + "External id": 292569,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614304.473, "dur": 1.028, + "args": { + "External id": 292570,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367614309.960, "dur": 43.524, + "args": { + "External id": 292571,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367614311.120, "dur": 41.664, + "args": { + "External id": 292572,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614360.132, "dur": 15.893, + "args": { + "External id": 292573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367614382.015, "dur": 4.837, + "args": { + "External id": 292574,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614384.875, "dur": 0.904, + "args": { + "External id": 292575,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367614391.382, "dur": 49.599, + "args": { + "External id": 292576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367614392.225, "dur": 6.222, + "args": { + "External id": 292577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367614393.211, "dur": 4.648, + "args": { + "External id": 292578,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614396.871, "dur": 0.872, + "args": { + "External id": 292579,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367614399.145, "dur": 41.423, + "args": { + "External id": 292580,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367614399.830, "dur": 40.062, + "args": { + "External id": 292581,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367614444.778, "dur": 3.900, + "args": { + "External id": 292582,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614446.886, "dur": 0.979, + "args": { + "External id": 292583,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "786432000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367614457.350, "dur": 2.245, + "args": { + "External id": 292584,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614466.818, "dur": 6.022, + "args": { + "External id": 292585,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614468.814, "dur": 3.769, + "args": { + "External id": 292586,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367614560.270, "dur": 244.940, + "args": { + "External id": 292587,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614562.816, "dur": 2.271, + "args": { + "External id": 292588,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367614566.322, "dur": 238.537, + "args": { + "External id": 292589,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367614570.283, "dur": 0.498, + "args": { + "External id": 292590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367614574.206, "dur": 22.094, + "args": { + "External id": 292591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367614598.113, "dur": 3.348, + "args": { + "External id": 292592,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614600.264, "dur": 0.927, + "args": { + "External id": 292593,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367614602.443, "dur": 61.354, + "args": { + "External id": 292594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614606.141, "dur": 1.193, + "args": { + "External id": 292595,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367614608.655, "dur": 54.422, + "args": { + "External id": 292596,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614611.180, "dur": 2.867, + "args": { + "External id": 292597,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367614665.656, "dur": 25.049, + "args": { + "External id": 292598,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614692.345, "dur": 14.144, + "args": { + "External id": 292599,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367614709.620, "dur": 19.160, + "args": { + "External id": 292600,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614732.622, "dur": 18.949, + "args": { + "External id": 292601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367614753.446, "dur": 22.175, + "args": { + "External id": 292602,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614755.851, "dur": 1.326, + "args": { + "External id": 292603,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614759.190, "dur": 1.067, + "args": { + "External id": 292604,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614776.866, "dur": 13.372, + "args": { + "External id": 292605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614791.582, "dur": 12.029, + "args": { + "External id": 292606,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367614814.898, "dur": 2.276, + "args": { + "External id": 292607,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367614826.537, "dur": 1.250, + "args": { + "External id": 292608,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614833.668, "dur": 4.247, + "args": { + "External id": 292609,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614835.341, "dur": 2.338, + "args": { + "External id": 292610,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367614919.668, "dur": 175.081, + "args": { + "External id": 292611,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614923.598, "dur": 2.068, + "args": { + "External id": 292612,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367614927.043, "dur": 167.302, + "args": { + "External id": 292613,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367614928.129, "dur": 0.205, + "args": { + "External id": 292614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367614929.426, "dur": 18.275, + "args": { + "External id": 292615,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367614949.118, "dur": 9.818, + "args": { + "External id": 292616,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367614957.701, "dur": 0.893, + "args": { + "External id": 292617,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367614959.650, "dur": 22.361, + "args": { + "External id": 292618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367614963.023, "dur": 1.462, + "args": { + "External id": 292619,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367614965.770, "dur": 15.946, + "args": { + "External id": 292620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367614967.555, "dur": 1.808, + "args": { + "External id": 292621,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367614985.674, "dur": 15.941, + "args": { + "External id": 292622,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615002.634, "dur": 15.324, + "args": { + "External id": 292623,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367615020.527, "dur": 12.147, + "args": { + "External id": 292624,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615033.705, "dur": 12.822, + "args": { + "External id": 292625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367615047.955, "dur": 20.202, + "args": { + "External id": 292626,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615049.415, "dur": 1.210, + "args": { + "External id": 292627,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615054.126, "dur": 1.082, + "args": { + "External id": 292628,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615069.310, "dur": 11.880, + "args": { + "External id": 292629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615081.999, "dur": 11.324, + "args": { + "External id": 292630,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367615099.353, "dur": 1.383, + "args": { + "External id": 292631,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367615108.321, "dur": 25.288, + "args": { + "External id": 292632,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367615110.486, "dur": 6.157, + "args": { + "External id": 292633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615112.661, "dur": 3.616, + "args": { + "External id": 292634,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367615117.879, "dur": 14.993, + "args": { + "External id": 292635,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367615140.594, "dur": 4.624, + "args": { + "External id": 292636,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615143.269, "dur": 0.836, + "args": { + "External id": 292637,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367615233.047, "dur": 73.508, + "args": { + "External id": 292638,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367615313.038, "dur": 8.555, + "args": { + "External id": 292639,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615318.991, "dur": 1.182, + "args": { + "External id": 292640,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615323.269, "dur": 28.293, + "args": { + "External id": 292641,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367615357.402, "dur": 7.085, + "args": { + "External id": 292642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367615359.497, "dur": 4.309, + "args": { + "External id": 292643,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615362.435, "dur": 1.176, + "args": { + "External id": 292644,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367615367.723, "dur": 46.876, + "args": { + "External id": 292645,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367615371.054, "dur": 42.816, + "args": { + "External id": 292646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615419.281, "dur": 16.362, + "args": { + "External id": 292647,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367615441.659, "dur": 4.199, + "args": { + "External id": 292648,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615443.767, "dur": 0.827, + "args": { + "External id": 292649,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367615450.308, "dur": 54.361, + "args": { + "External id": 292650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367615451.611, "dur": 8.950, + "args": { + "External id": 292651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367615452.787, "dur": 7.005, + "args": { + "External id": 292652,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615458.878, "dur": 0.814, + "args": { + "External id": 292653,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367615461.216, "dur": 42.961, + "args": { + "External id": 292654,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367615462.109, "dur": 41.086, + "args": { + "External id": 292655,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367615508.737, "dur": 4.056, + "args": { + "External id": 292656,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615510.905, "dur": 1.083, + "args": { + "External id": 292657,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1048576000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367615518.531, "dur": 1.696, + "args": { + "External id": 292658,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615529.412, "dur": 6.213, + "args": { + "External id": 292659,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615531.585, "dur": 3.756, + "args": { + "External id": 292660,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367615615.791, "dur": 232.360, + "args": { + "External id": 292661,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615617.632, "dur": 1.529, + "args": { + "External id": 292662,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367615621.231, "dur": 226.484, + "args": { + "External id": 292663,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367615657.704, "dur": 0.590, + "args": { + "External id": 292664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367615662.434, "dur": 28.091, + "args": { + "External id": 292665,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367615692.237, "dur": 3.988, + "args": { + "External id": 292666,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615695.160, "dur": 0.780, + "args": { + "External id": 292667,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367615697.019, "dur": 26.617, + "args": { + "External id": 292668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615698.422, "dur": 1.808, + "args": { + "External id": 292669,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367615701.269, "dur": 21.958, + "args": { + "External id": 292670,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615707.125, "dur": 2.540, + "args": { + "External id": 292671,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367615725.041, "dur": 20.871, + "args": { + "External id": 292672,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615747.259, "dur": 14.910, + "args": { + "External id": 292673,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367615764.987, "dur": 14.146, + "args": { + "External id": 292674,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615780.449, "dur": 13.617, + "args": { + "External id": 292675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367615797.904, "dur": 21.056, + "args": { + "External id": 292676,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615800.255, "dur": 1.733, + "args": { + "External id": 292677,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615803.989, "dur": 1.132, + "args": { + "External id": 292678,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615820.306, "dur": 13.144, + "args": { + "External id": 292679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615834.617, "dur": 11.962, + "args": { + "External id": 292680,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367615855.617, "dur": 2.341, + "args": { + "External id": 292681,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367615869.376, "dur": 1.143, + "args": { + "External id": 292682,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615876.253, "dur": 4.305, + "args": { + "External id": 292683,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615877.930, "dur": 2.275, + "args": { + "External id": 292684,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367615952.920, "dur": 166.658, + "args": { + "External id": 292685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615954.745, "dur": 1.942, + "args": { + "External id": 292686,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367615960.256, "dur": 158.963, + "args": { + "External id": 292687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367615963.078, "dur": 0.313, + "args": { + "External id": 292688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367615963.927, "dur": 18.232, + "args": { + "External id": 292689,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367615983.614, "dur": 5.526, + "args": { + "External id": 292690,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367615985.603, "dur": 3.130, + "args": { + "External id": 292691,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367615989.756, "dur": 18.073, + "args": { + "External id": 292692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367615991.328, "dur": 1.221, + "args": { + "External id": 292693,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367615993.224, "dur": 14.347, + "args": { + "External id": 292694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367615994.931, "dur": 1.283, + "args": { + "External id": 292695,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367616008.801, "dur": 17.680, + "args": { + "External id": 292696,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616027.371, "dur": 11.880, + "args": { + "External id": 292697,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367616043.906, "dur": 12.639, + "args": { + "External id": 292698,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616057.292, "dur": 11.838, + "args": { + "External id": 292699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367616070.359, "dur": 19.705, + "args": { + "External id": 292700,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616071.915, "dur": 1.278, + "args": { + "External id": 292701,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616074.509, "dur": 2.944, + "args": { + "External id": 292702,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616091.080, "dur": 12.271, + "args": { + "External id": 292703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616106.605, "dur": 11.719, + "args": { + "External id": 292704,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367616123.906, "dur": 1.200, + "args": { + "External id": 292705,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367616132.891, "dur": 24.230, + "args": { + "External id": 292706,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367616135.060, "dur": 6.598, + "args": { + "External id": 292707,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616137.596, "dur": 3.475, + "args": { + "External id": 292708,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367616143.024, "dur": 13.366, + "args": { + "External id": 292709,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367616163.315, "dur": 20.523, + "args": { + "External id": 292710,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616180.892, "dur": 1.064, + "args": { + "External id": 292711,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367616258.540, "dur": 67.039, + "args": { + "External id": 292712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367616331.729, "dur": 4.973, + "args": { + "External id": 292713,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616334.573, "dur": 0.987, + "args": { + "External id": 292714,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616338.240, "dur": 26.585, + "args": { + "External id": 292715,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367616369.704, "dur": 8.459, + "args": { + "External id": 292716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367616373.788, "dur": 3.671, + "args": { + "External id": 292717,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616375.819, "dur": 1.447, + "args": { + "External id": 292718,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367616381.224, "dur": 45.793, + "args": { + "External id": 292719,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367616382.363, "dur": 43.843, + "args": { + "External id": 292720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616431.273, "dur": 17.887, + "args": { + "External id": 292721,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367616455.101, "dur": 6.826, + "args": { + "External id": 292722,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616459.849, "dur": 0.775, + "args": { + "External id": 292723,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367616465.751, "dur": 74.498, + "args": { + "External id": 292724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367616466.760, "dur": 4.009, + "args": { + "External id": 292725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367616467.434, "dur": 2.736, + "args": { + "External id": 292726,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616469.372, "dur": 0.661, + "args": { + "External id": 292727,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367616471.443, "dur": 68.446, + "args": { + "External id": 292728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367616472.409, "dur": 66.855, + "args": { + "External id": 292729,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367616546.398, "dur": 3.580, + "args": { + "External id": 292730,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616548.402, "dur": 0.664, + "args": { + "External id": 292731,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1310720000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367616555.582, "dur": 2.086, + "args": { + "External id": 292732,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367616564.446, "dur": 7.505, + "args": { + "External id": 292733,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367616568.302, "dur": 3.331, + "args": { + "External id": 292734,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367616689.386, "dur": 199.858, + "args": { + "External id": 292735,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367616694.205, "dur": 3.381, + "args": { + "External id": 292736,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367616699.471, "dur": 189.398, + "args": { + "External id": 292737,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367616701.269, "dur": 0.467, + "args": { + "External id": 292738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367616702.769, "dur": 24.885, + "args": { + "External id": 292739,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367616729.510, "dur": 5.651, + "args": { + "External id": 292740,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616733.930, "dur": 0.756, + "args": { + "External id": 292741,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367616738.216, "dur": 22.660, + "args": { + "External id": 292742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367616739.527, "dur": 1.266, + "args": { + "External id": 292743,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367616742.320, "dur": 18.138, + "args": { + "External id": 292744,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616744.906, "dur": 2.990, + "args": { + "External id": 292745,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367616762.627, "dur": 22.738, + "args": { + "External id": 292746,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616786.707, "dur": 16.345, + "args": { + "External id": 292747,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367616805.890, "dur": 14.997, + "args": { + "External id": 292748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616822.093, "dur": 13.598, + "args": { + "External id": 292749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367616837.443, "dur": 22.659, + "args": { + "External id": 292750,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616841.496, "dur": 0.929, + "args": { + "External id": 292751,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367616844.598, "dur": 0.825, + "args": { + "External id": 292752,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616861.407, "dur": 12.980, + "args": { + "External id": 292753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367616875.695, "dur": 12.138, + "args": { + "External id": 292754,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367616895.599, "dur": 1.949, + "args": { + "External id": 292755,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367616908.134, "dur": 1.023, + "args": { + "External id": 292756,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367616914.926, "dur": 4.465, + "args": { + "External id": 292757,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367616916.983, "dur": 2.085, + "args": { + "External id": 292758,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367616989.512, "dur": 168.933, + "args": { + "External id": 292759,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367616991.363, "dur": 1.659, + "args": { + "External id": 292760,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367616994.088, "dur": 164.095, + "args": { + "External id": 292761,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367616997.348, "dur": 0.386, + "args": { + "External id": 292762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367616998.728, "dur": 19.888, + "args": { + "External id": 292763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367617019.959, "dur": 2.904, + "args": { + "External id": 292764,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617021.841, "dur": 0.754, + "args": { + "External id": 292765,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367617023.484, "dur": 21.407, + "args": { + "External id": 292766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367617024.240, "dur": 1.335, + "args": { + "External id": 292767,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367617026.409, "dur": 18.191, + "args": { + "External id": 292768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617030.534, "dur": 1.381, + "args": { + "External id": 292769,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367617045.853, "dur": 21.768, + "args": { + "External id": 292770,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617068.787, "dur": 12.340, + "args": { + "External id": 292771,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367617085.137, "dur": 12.340, + "args": { + "External id": 292772,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617098.484, "dur": 11.756, + "args": { + "External id": 292773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367617111.636, "dur": 18.732, + "args": { + "External id": 292774,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617112.717, "dur": 1.063, + "args": { + "External id": 292775,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617115.935, "dur": 1.042, + "args": { + "External id": 292776,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617131.452, "dur": 11.499, + "args": { + "External id": 292777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617146.073, "dur": 11.115, + "args": { + "External id": 292778,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367617162.576, "dur": 1.184, + "args": { + "External id": 292779,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367617185.477, "dur": 31.404, + "args": { + "External id": 292780,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367617187.774, "dur": 7.994, + "args": { + "External id": 292781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617190.429, "dur": 4.752, + "args": { + "External id": 292782,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367617197.081, "dur": 19.083, + "args": { + "External id": 292783,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367617223.779, "dur": 5.123, + "args": { + "External id": 292784,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617226.583, "dur": 1.130, + "args": { + "External id": 292785,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367617295.938, "dur": 66.021, + "args": { + "External id": 292786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367617366.909, "dur": 4.606, + "args": { + "External id": 292787,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617369.323, "dur": 0.983, + "args": { + "External id": 292788,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617373.097, "dur": 25.714, + "args": { + "External id": 292789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367617403.095, "dur": 8.494, + "args": { + "External id": 292790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367617407.177, "dur": 3.682, + "args": { + "External id": 292791,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617409.323, "dur": 1.338, + "args": { + "External id": 292792,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367617414.489, "dur": 42.248, + "args": { + "External id": 292793,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367617415.790, "dur": 40.344, + "args": { + "External id": 292794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617460.654, "dur": 17.524, + "args": { + "External id": 292795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367617484.023, "dur": 5.362, + "args": { + "External id": 292796,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617487.587, "dur": 0.629, + "args": { + "External id": 292797,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367617493.538, "dur": 50.378, + "args": { + "External id": 292798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367617494.646, "dur": 5.061, + "args": { + "External id": 292799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367617495.771, "dur": 3.355, + "args": { + "External id": 292800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617498.115, "dur": 0.890, + "args": { + "External id": 292801,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367617500.517, "dur": 43.023, + "args": { + "External id": 292802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367617501.309, "dur": 41.108, + "args": { + "External id": 292803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367617547.892, "dur": 5.639, + "args": { + "External id": 292804,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617551.703, "dur": 0.881, + "args": { + "External id": 292805,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1572864000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367617558.753, "dur": 1.640, + "args": { + "External id": 292806,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367617566.977, "dur": 5.351, + "args": { + "External id": 292807,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367617568.705, "dur": 3.364, + "args": { + "External id": 292808,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367617699.367, "dur": 200.110, + "args": { + "External id": 292809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367617701.672, "dur": 3.272, + "args": { + "External id": 292810,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367617708.923, "dur": 190.119, + "args": { + "External id": 292811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367617710.295, "dur": 0.455, + "args": { + "External id": 292812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367617712.265, "dur": 23.553, + "args": { + "External id": 292813,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367617737.668, "dur": 5.948, + "args": { + "External id": 292814,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617739.781, "dur": 3.258, + "args": { + "External id": 292815,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367617744.655, "dur": 26.921, + "args": { + "External id": 292816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367617745.690, "dur": 1.453, + "args": { + "External id": 292817,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367617750.647, "dur": 20.565, + "args": { + "External id": 292818,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617753.784, "dur": 3.116, + "args": { + "External id": 292819,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367617772.903, "dur": 19.858, + "args": { + "External id": 292820,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617793.912, "dur": 15.969, + "args": { + "External id": 292821,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367617812.980, "dur": 14.173, + "args": { + "External id": 292822,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617828.575, "dur": 14.099, + "args": { + "External id": 292823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367617844.272, "dur": 25.871, + "args": { + "External id": 292824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617846.227, "dur": 1.148, + "args": { + "External id": 292825,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367617852.317, "dur": 3.452, + "args": { + "External id": 292826,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617871.453, "dur": 13.520, + "args": { + "External id": 292827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367617886.261, "dur": 11.479, + "args": { + "External id": 292828,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367617906.090, "dur": 2.146, + "args": { + "External id": 292829,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367617917.855, "dur": 1.162, + "args": { + "External id": 292830,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367617924.816, "dur": 5.799, + "args": { + "External id": 292831,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367617927.861, "dur": 2.494, + "args": { + "External id": 292832,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367618005.218, "dur": 192.434, + "args": { + "External id": 292833,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367618007.035, "dur": 2.847, + "args": { + "External id": 292834,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367618010.848, "dur": 186.403, + "args": { + "External id": 292835,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367618011.799, "dur": 0.249, + "args": { + "External id": 292836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367618012.857, "dur": 17.445, + "args": { + "External id": 292837,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367618031.319, "dur": 6.134, + "args": { + "External id": 292838,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618035.376, "dur": 1.759, + "args": { + "External id": 292839,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367618037.980, "dur": 20.913, + "args": { + "External id": 292840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367618040.748, "dur": 1.595, + "args": { + "External id": 292841,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367618043.665, "dur": 14.984, + "args": { + "External id": 292842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618045.473, "dur": 1.843, + "args": { + "External id": 292843,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367618059.853, "dur": 22.412, + "args": { + "External id": 292844,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618083.269, "dur": 17.482, + "args": { + "External id": 292845,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367618102.809, "dur": 13.868, + "args": { + "External id": 292846,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618117.843, "dur": 13.298, + "args": { + "External id": 292847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367618132.619, "dur": 19.910, + "args": { + "External id": 292848,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618133.694, "dur": 1.066, + "args": { + "External id": 292849,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618138.332, "dur": 1.213, + "args": { + "External id": 292850,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618153.635, "dur": 11.843, + "args": { + "External id": 292851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618181.392, "dur": 14.327, + "args": { + "External id": 292852,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367618203.616, "dur": 1.872, + "args": { + "External id": 292853,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367618213.650, "dur": 33.062, + "args": { + "External id": 292854,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367618216.028, "dur": 11.494, + "args": { + "External id": 292855,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618223.413, "dur": 3.725, + "args": { + "External id": 292856,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367618229.076, "dur": 16.901, + "args": { + "External id": 292857,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367618253.323, "dur": 5.051, + "args": { + "External id": 292858,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618256.283, "dur": 0.911, + "args": { + "External id": 292859,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367618330.025, "dur": 66.291, + "args": { + "External id": 292860,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367618401.617, "dur": 6.705, + "args": { + "External id": 292861,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618406.334, "dur": 0.896, + "args": { + "External id": 292862,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618410.296, "dur": 26.332, + "args": { + "External id": 292863,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367618441.478, "dur": 44.647, + "args": { + "External id": 292864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367618481.861, "dur": 3.164, + "args": { + "External id": 292865,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618483.670, "dur": 1.182, + "args": { + "External id": 292866,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367618489.378, "dur": 47.419, + "args": { + "External id": 292867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367618492.805, "dur": 43.383, + "args": { + "External id": 292868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618541.133, "dur": 16.822, + "args": { + "External id": 292869,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367618563.716, "dur": 3.533, + "args": { + "External id": 292870,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618565.440, "dur": 0.665, + "args": { + "External id": 292871,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070554, "tid": 2070554, + "ts": 5333367618571.620, "dur": 88.632, + "args": { + "External id": 292872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367618572.306, "dur": 8.759, + "args": { + "External id": 292873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367618573.096, "dur": 7.089, + "args": { + "External id": 292874,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618579.525, "dur": 0.551, + "args": { + "External id": 292875,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367618581.808, "dur": 77.856, + "args": { + "External id": 292876,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367618582.503, "dur": 75.868, + "args": { + "External id": 292877,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367618666.841, "dur": 4.182, + "args": { + "External id": 292878,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618669.082, "dur": 0.851, + "args": { + "External id": 292879,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1835008000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367618679.714, "dur": 2.005, + "args": { + "External id": 292880,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367618689.438, "dur": 5.891, + "args": { + "External id": 292881,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367618691.264, "dur": 3.799, + "args": { + "External id": 292882,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367618785.803, "dur": 196.841, + "args": { + "External id": 292883,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367618787.984, "dur": 2.019, + "args": { + "External id": 292884,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367618794.161, "dur": 188.080, + "args": { + "External id": 292885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367618795.439, "dur": 0.219, + "args": { + "External id": 292886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367618799.581, "dur": 26.013, + "args": { + "External id": 292887,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367618827.174, "dur": 3.299, + "args": { + "External id": 292888,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618829.376, "dur": 0.660, + "args": { + "External id": 292889,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367618831.564, "dur": 23.168, + "args": { + "External id": 292890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367618832.395, "dur": 1.422, + "args": { + "External id": 292891,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367618834.852, "dur": 19.439, + "args": { + "External id": 292892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618839.091, "dur": 2.819, + "args": { + "External id": 292893,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367618858.085, "dur": 23.561, + "args": { + "External id": 292894,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618883.166, "dur": 13.360, + "args": { + "External id": 292895,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367618899.063, "dur": 14.594, + "args": { + "External id": 292896,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618914.912, "dur": 14.259, + "args": { + "External id": 292897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367618930.933, "dur": 19.425, + "args": { + "External id": 292898,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618932.788, "dur": 0.994, + "args": { + "External id": 292899,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367618935.648, "dur": 0.485, + "args": { + "External id": 292900,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618953.910, "dur": 13.254, + "args": { + "External id": 292901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367618968.393, "dur": 12.588, + "args": { + "External id": 292902,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367618988.563, "dur": 1.474, + "args": { + "External id": 292903,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367618998.673, "dur": 0.943, + "args": { + "External id": 292904,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367619005.004, "dur": 6.313, + "args": { + "External id": 292905,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367619008.475, "dur": 2.496, + "args": { + "External id": 292906,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367619081.050, "dur": 180.363, + "args": { + "External id": 292907,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367619083.227, "dur": 1.852, + "args": { + "External id": 292908,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070554, "tid": 2070554, + "ts": 5333367619088.300, "dur": 172.737, + "args": { + "External id": 292909,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070554, "tid": 2070554, + "ts": 5333367619088.951, "dur": 0.146, + "args": { + "External id": 292910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070554, "tid": 2070554, + "ts": 5333367619090.275, "dur": 17.516, + "args": { + "External id": 292911,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070554, "tid": 2070554, + "ts": 5333367619108.977, "dur": 4.717, + "args": { + "External id": 292912,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367619110.847, "dur": 2.620, + "args": { + "External id": 292913,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367619114.530, "dur": 21.075, + "args": { + "External id": 292914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333367619117.253, "dur": 1.130, + "args": { + "External id": 292915,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333367619119.484, "dur": 15.880, + "args": { + "External id": 292916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619121.077, "dur": 1.445, + "args": { + "External id": 292917,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333367619136.621, "dur": 16.117, + "args": { + "External id": 292918,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619153.762, "dur": 12.108, + "args": { + "External id": 292919,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070554, "tid": 2070554, + "ts": 5333367619182.973, "dur": 15.913, + "args": { + "External id": 292920,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619199.812, "dur": 12.645, + "args": { + "External id": 292921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367619214.317, "dur": 21.133, + "args": { + "External id": 292922,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619216.122, "dur": 1.291, + "args": { + "External id": 292923,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367619220.768, "dur": 0.645, + "args": { + "External id": 292924,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619236.549, "dur": 11.126, + "args": { + "External id": 292925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619248.696, "dur": 11.316, + "args": { + "External id": 292926,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333367619267.610, "dur": 1.629, + "args": { + "External id": 292927,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367619276.880, "dur": 26.991, + "args": { + "External id": 292928,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367619279.494, "dur": 8.165, + "args": { + "External id": 292929,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367619281.706, "dur": 5.480, + "args": { + "External id": 292930,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070554, "tid": 2070554, + "ts": 5333367619289.336, "dur": 13.773, + "args": { + "External id": 292931,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367619310.078, "dur": 4.482, + "args": { + "External id": 292932,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367619312.718, "dur": 0.630, + "args": { + "External id": 292933,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367619383.507, "dur": 63.331, + "args": { + "External id": 292934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070554, "tid": 2070554, + "ts": 5333367619454.372, "dur": 4.455, + "args": { + "External id": 292935,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367619456.813, "dur": 0.907, + "args": { + "External id": 292936,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619460.480, "dur": 25.544, + "args": { + "External id": 292937,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070554, "tid": 2070554, + "ts": 5333367619490.893, "dur": 5.788, + "args": { + "External id": 292938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070554, "tid": 2070554, + "ts": 5333367619492.445, "dur": 3.284, + "args": { + "External id": 292939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367619494.431, "dur": 1.119, + "args": { + "External id": 292940,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070554, "tid": 2070554, + "ts": 5333367619501.674, "dur": 41.928, + "args": { + "External id": 292941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070554, "tid": 2070554, + "ts": 5333367619502.739, "dur": 40.217, + "args": { + "External id": 292942,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070554, "tid": 2070554, + "ts": 5333367619547.657, "dur": 15.250, + "args": { + "External id": 292943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367619568.066, "dur": 26.271, + "args": { + "External id": 292944,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070554, "tid": 2070554, + "ts": 5333367619570.590, "dur": 23.290, + "args": { + "External id": 292945,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367619575.545, "dur": 3.054, + "args": { + "External id": 292946,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070554, "tid": 2070554, + "ts": 5333367619603.389, "dur": 54.520, + "args": { + "External id": 292947,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070554, "tid": 2070554, + "ts": 5333367619710.105, "dur": 27.765, + "args": { + "External id": 292948,"Sequence number": 1209230, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8480 + } + }, + { + "ph": "s", "id": 2, "pid": 2070554, "tid": 2070554, "ts": 5333367619710.105, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2070554, "tid": 2070554, + "ts": 5333367619847.742, "dur": 41.631, + "args": { + "External id": 292949,"Record function id": 0, "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070554, "tid": 2070554, + "ts": 5333367619981.547, "dur": 30.704, + "args": { + "External id": 292950,"Sequence number": 1209231, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8482 + } + }, + { + "ph": "s", "id": 1, "pid": 2070554, "tid": 2070554, "ts": 5333367619981.547, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367620060.560, "dur": 28.597, + "args": { + "External id": 292951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333367620062.223, "dur": 8.651, + "args": { + "External id": 292952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333367620066.161, "dur": 4.233, + "args": { + "External id": 292953,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333367620072.634, "dur": 16.225, + "args": { + "External id": 292954,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070554, "tid": 2070554, + "ts": 5333368983825.440, "dur": 51.688, + "args": { + "External id": 292955,"Sequence number": 1209232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070554, "tid": 2070554, + "ts": 5333368983885.601, "dur": 17.254, + "args": { + "External id": 292956,"Sequence number": 1209233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070554, "tid": 2070554, + "ts": 5333368983910.731, "dur": 23.606, + "args": { + "External id": 292957,"Sequence number": 1209234, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070554, "tid": 2070554, + "ts": 5333368984241.711, "dur": 26.391, + "args": { + "External id": 292958,"Sequence number": 1209235, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070554, "tid": 2070554, + "ts": 5333368984273.817, "dur": 25.707, + "args": { + "External id": 292959,"Sequence number": 1209236, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070554, "tid": 2070554, + "ts": 5333368985845.453, "dur": 2937.440, + "args": { + "External id": 292960,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070554, "tid": 2070554, + "ts": 5333368986419.899, "dur": 842.115, + "args": { + "External id": 292961,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070554, "tid": 2070554, + "ts": 5333368986443.847, "dur": 68.943, + "args": { + "External id": 292962,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333368986447.187, "dur": 11.811, + "args": { + "External id": 292963,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070554, "tid": 2070554, + "ts": 5333368986462.041, "dur": 50.427, + "args": { + "External id": 292964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[36500]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070554, "tid": 2070554, + "ts": 5333368986463.925, "dur": 47.949, + "args": { + "External id": 292965,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[36500], []], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988819.583, "dur": 3.528, + "args": { + "External id": 292966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988825.139, "dur": 0.458, + "args": { + "External id": 292967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988826.825, "dur": 0.285, + "args": { + "External id": 292968,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988828.237, "dur": 0.458, + "args": { + "External id": 292969,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988829.854, "dur": 0.482, + "args": { + "External id": 292970,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988831.738, "dur": 0.243, + "args": { + "External id": 292971,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988833.231, "dur": 0.212, + "args": { + "External id": 292972,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988836.721, "dur": 0.209, + "args": { + "External id": 292973,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988837.964, "dur": 0.245, + "args": { + "External id": 292974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988839.169, "dur": 0.261, + "args": { + "External id": 292975,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988840.270, "dur": 0.224, + "args": { + "External id": 292976,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988841.505, "dur": 0.212, + "args": { + "External id": 292977,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988842.497, "dur": 0.388, + "args": { + "External id": 292978,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988843.660, "dur": 0.215, + "args": { + "External id": 292979,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988844.644, "dur": 0.213, + "args": { + "External id": 292980,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988847.651, "dur": 0.251, + "args": { + "External id": 292981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988848.743, "dur": 0.260, + "args": { + "External id": 292982,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988849.923, "dur": 0.236, + "args": { + "External id": 292983,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988850.978, "dur": 0.249, + "args": { + "External id": 292984,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988851.987, "dur": 0.236, + "args": { + "External id": 292985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988853.001, "dur": 0.245, + "args": { + "External id": 292986,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988854.414, "dur": 0.234, + "args": { + "External id": 292987,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988855.481, "dur": 0.463, + "args": { + "External id": 292988,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988858.849, "dur": 0.427, + "args": { + "External id": 292989,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988860.133, "dur": 0.212, + "args": { + "External id": 292990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988861.606, "dur": 0.246, + "args": { + "External id": 292991,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988862.664, "dur": 0.464, + "args": { + "External id": 292992,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988864.185, "dur": 0.213, + "args": { + "External id": 292993,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988865.210, "dur": 0.210, + "args": { + "External id": 292994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988866.193, "dur": 0.383, + "args": { + "External id": 292995,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988867.325, "dur": 0.380, + "args": { + "External id": 292996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988870.710, "dur": 0.227, + "args": { + "External id": 292997,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988871.756, "dur": 0.237, + "args": { + "External id": 292998,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988873.247, "dur": 0.243, + "args": { + "External id": 292999,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988874.425, "dur": 0.210, + "args": { + "External id": 293000,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988875.497, "dur": 0.208, + "args": { + "External id": 293001,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988876.448, "dur": 0.207, + "args": { + "External id": 293002,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988877.404, "dur": 0.208, + "args": { + "External id": 293003,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988878.756, "dur": 0.216, + "args": { + "External id": 293004,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988881.606, "dur": 0.205, + "args": { + "External id": 293005,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988882.754, "dur": 0.233, + "args": { + "External id": 293006,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988883.948, "dur": 0.240, + "args": { + "External id": 293007,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988884.937, "dur": 0.210, + "args": { + "External id": 293008,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988886.014, "dur": 0.216, + "args": { + "External id": 293009,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988886.977, "dur": 0.235, + "args": { + "External id": 293010,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988888.024, "dur": 0.234, + "args": { + "External id": 293011,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988889.248, "dur": 3.584, + "args": { + "External id": 293012,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988895.560, "dur": 0.422, + "args": { + "External id": 293013,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988897.173, "dur": 0.402, + "args": { + "External id": 293014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988898.320, "dur": 0.226, + "args": { + "External id": 293015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988899.329, "dur": 0.237, + "args": { + "External id": 293016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988900.350, "dur": 0.223, + "args": { + "External id": 293017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988901.325, "dur": 0.411, + "args": { + "External id": 293018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988902.745, "dur": 0.231, + "args": { + "External id": 293019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988903.720, "dur": 0.210, + "args": { + "External id": 293020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988906.697, "dur": 0.201, + "args": { + "External id": 293021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988907.689, "dur": 0.220, + "args": { + "External id": 293022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988908.686, "dur": 0.237, + "args": { + "External id": 293023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988909.665, "dur": 0.211, + "args": { + "External id": 293024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988910.711, "dur": 0.205, + "args": { + "External id": 293025,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988911.717, "dur": 0.208, + "args": { + "External id": 293026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988913.008, "dur": 0.239, + "args": { + "External id": 293027,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988914.005, "dur": 0.225, + "args": { + "External id": 293028,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988917.132, "dur": 0.211, + "args": { + "External id": 293029,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988918.341, "dur": 0.217, + "args": { + "External id": 293030,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988919.783, "dur": 0.218, + "args": { + "External id": 293031,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988920.945, "dur": 0.210, + "args": { + "External id": 293032,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988922.005, "dur": 0.493, + "args": { + "External id": 293033,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988923.496, "dur": 0.426, + "args": { + "External id": 293034,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988924.677, "dur": 0.202, + "args": { + "External id": 293035,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988925.687, "dur": 0.205, + "args": { + "External id": 293036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988928.659, "dur": 0.265, + "args": { + "External id": 293037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988929.751, "dur": 0.211, + "args": { + "External id": 293038,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988930.931, "dur": 0.231, + "args": { + "External id": 293039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988931.930, "dur": 0.200, + "args": { + "External id": 293040,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988932.957, "dur": 0.214, + "args": { + "External id": 293041,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988934.060, "dur": 0.476, + "args": { + "External id": 293042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988936.338, "dur": 0.219, + "args": { + "External id": 293043,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988937.365, "dur": 0.197, + "args": { + "External id": 293044,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988940.852, "dur": 0.211, + "args": { + "External id": 293045,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988941.849, "dur": 0.231, + "args": { + "External id": 293046,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988942.844, "dur": 0.402, + "args": { + "External id": 293047,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988944.217, "dur": 0.435, + "args": { + "External id": 293048,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988945.454, "dur": 0.208, + "args": { + "External id": 293049,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988946.422, "dur": 0.209, + "args": { + "External id": 293050,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988952.301, "dur": 0.255, + "args": { + "External id": 293051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988953.448, "dur": 0.201, + "args": { + "External id": 293052,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988956.636, "dur": 0.412, + "args": { + "External id": 293053,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988957.851, "dur": 0.375, + "args": { + "External id": 293054,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988959.041, "dur": 0.421, + "args": { + "External id": 293055,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988960.365, "dur": 0.201, + "args": { + "External id": 293056,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988961.331, "dur": 0.208, + "args": { + "External id": 293057,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988962.467, "dur": 0.202, + "args": { + "External id": 293058,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988964.231, "dur": 0.205, + "args": { + "External id": 293059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988965.426, "dur": 0.197, + "args": { + "External id": 293060,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988968.135, "dur": 0.485, + "args": { + "External id": 293061,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988969.434, "dur": 0.399, + "args": { + "External id": 293062,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988970.633, "dur": 0.551, + "args": { + "External id": 293063,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988972.119, "dur": 0.247, + "args": { + "External id": 293064,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988973.209, "dur": 0.204, + "args": { + "External id": 293065,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988974.388, "dur": 0.223, + "args": { + "External id": 293066,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988975.362, "dur": 0.224, + "args": { + "External id": 293067,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988976.540, "dur": 0.201, + "args": { + "External id": 293068,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988979.466, "dur": 0.395, + "args": { + "External id": 293069,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988980.782, "dur": 0.204, + "args": { + "External id": 293070,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988981.823, "dur": 0.506, + "args": { + "External id": 293071,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988983.371, "dur": 0.193, + "args": { + "External id": 293072,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988984.370, "dur": 0.199, + "args": { + "External id": 293073,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988985.333, "dur": 0.208, + "args": { + "External id": 293074,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988986.755, "dur": 0.205, + "args": { + "External id": 293075,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988989.264, "dur": 0.233, + "args": { + "External id": 293076,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988992.479, "dur": 0.234, + "args": { + "External id": 293077,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988993.533, "dur": 0.234, + "args": { + "External id": 293078,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988994.521, "dur": 0.229, + "args": { + "External id": 293079,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988995.536, "dur": 0.224, + "args": { + "External id": 293080,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988996.648, "dur": 0.210, + "args": { + "External id": 293081,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988997.702, "dur": 0.206, + "args": { + "External id": 293082,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988998.661, "dur": 0.202, + "args": { + "External id": 293083,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368988999.654, "dur": 0.203, + "args": { + "External id": 293084,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989002.773, "dur": 0.204, + "args": { + "External id": 293085,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989003.786, "dur": 0.204, + "args": { + "External id": 293086,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989004.791, "dur": 0.202, + "args": { + "External id": 293087,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989005.801, "dur": 0.226, + "args": { + "External id": 293088,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989006.838, "dur": 0.202, + "args": { + "External id": 293089,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989007.792, "dur": 0.221, + "args": { + "External id": 293090,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989008.775, "dur": 0.198, + "args": { + "External id": 293091,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989009.768, "dur": 0.203, + "args": { + "External id": 293092,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989012.761, "dur": 0.203, + "args": { + "External id": 293093,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989013.721, "dur": 0.208, + "args": { + "External id": 293094,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989015.046, "dur": 0.199, + "args": { + "External id": 293095,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989016.057, "dur": 0.207, + "args": { + "External id": 293096,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989017.045, "dur": 0.217, + "args": { + "External id": 293097,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989018.052, "dur": 0.225, + "args": { + "External id": 293098,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989019.052, "dur": 0.206, + "args": { + "External id": 293099,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989020.018, "dur": 0.194, + "args": { + "External id": 293100,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989022.865, "dur": 0.216, + "args": { + "External id": 293101,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989023.843, "dur": 0.203, + "args": { + "External id": 293102,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989024.804, "dur": 0.209, + "args": { + "External id": 293103,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989025.801, "dur": 0.201, + "args": { + "External id": 293104,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989026.763, "dur": 0.208, + "args": { + "External id": 293105,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989027.739, "dur": 0.216, + "args": { + "External id": 293106,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989028.711, "dur": 0.207, + "args": { + "External id": 293107,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989029.679, "dur": 0.395, + "args": { + "External id": 293108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989033.112, "dur": 0.210, + "args": { + "External id": 293109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989034.237, "dur": 0.394, + "args": { + "External id": 293110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989035.397, "dur": 0.414, + "args": { + "External id": 293111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989036.575, "dur": 0.521, + "args": { + "External id": 293112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989037.866, "dur": 0.391, + "args": { + "External id": 293113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989039.047, "dur": 0.223, + "args": { + "External id": 293114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989040.597, "dur": 0.366, + "args": { + "External id": 293115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989041.818, "dur": 0.418, + "args": { + "External id": 293116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989045.205, "dur": 0.234, + "args": { + "External id": 293117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989046.198, "dur": 0.217, + "args": { + "External id": 293118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989047.188, "dur": 0.206, + "args": { + "External id": 293119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989048.214, "dur": 0.431, + "args": { + "External id": 293120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989049.454, "dur": 0.228, + "args": { + "External id": 293121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989050.515, "dur": 0.399, + "args": { + "External id": 293122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989051.717, "dur": 0.205, + "args": { + "External id": 293123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989052.701, "dur": 0.198, + "args": { + "External id": 293124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989055.460, "dur": 0.203, + "args": { + "External id": 293125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989056.452, "dur": 0.197, + "args": { + "External id": 293126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989057.543, "dur": 0.212, + "args": { + "External id": 293127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989058.567, "dur": 0.224, + "args": { + "External id": 293128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989059.645, "dur": 0.213, + "args": { + "External id": 293129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989060.646, "dur": 0.199, + "args": { + "External id": 293130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989061.621, "dur": 0.207, + "args": { + "External id": 293131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989062.723, "dur": 0.220, + "args": { + "External id": 293132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989065.369, "dur": 0.203, + "args": { + "External id": 293133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989066.336, "dur": 0.474, + "args": { + "External id": 293134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989067.592, "dur": 0.419, + "args": { + "External id": 293135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989068.762, "dur": 0.221, + "args": { + "External id": 293136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989069.805, "dur": 0.233, + "args": { + "External id": 293137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989070.809, "dur": 0.402, + "args": { + "External id": 293138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989072.361, "dur": 0.202, + "args": { + "External id": 293139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989073.323, "dur": 0.365, + "args": { + "External id": 293140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989076.388, "dur": 0.207, + "args": { + "External id": 293141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989077.404, "dur": 0.199, + "args": { + "External id": 293142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989078.374, "dur": 0.204, + "args": { + "External id": 293143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989079.334, "dur": 0.197, + "args": { + "External id": 293144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989080.303, "dur": 0.202, + "args": { + "External id": 293145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989081.310, "dur": 0.224, + "args": { + "External id": 293146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989082.287, "dur": 0.232, + "args": { + "External id": 293147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989083.272, "dur": 0.196, + "args": { + "External id": 293148,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989086.035, "dur": 0.206, + "args": { + "External id": 293149,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989087.052, "dur": 0.196, + "args": { + "External id": 293150,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989088.006, "dur": 0.201, + "args": { + "External id": 293151,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989088.997, "dur": 0.197, + "args": { + "External id": 293152,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989089.971, "dur": 0.202, + "args": { + "External id": 293153,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989090.927, "dur": 0.197, + "args": { + "External id": 293154,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989091.914, "dur": 0.202, + "args": { + "External id": 293155,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989092.916, "dur": 0.225, + "args": { + "External id": 293156,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989095.724, "dur": 0.236, + "args": { + "External id": 293157,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989096.953, "dur": 0.238, + "args": { + "External id": 293158,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989098.018, "dur": 0.231, + "args": { + "External id": 293159,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989099.146, "dur": 0.198, + "args": { + "External id": 293160,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989100.094, "dur": 0.201, + "args": { + "External id": 293161,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989101.197, "dur": 0.199, + "args": { + "External id": 293162,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989102.200, "dur": 0.202, + "args": { + "External id": 293163,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989103.153, "dur": 0.200, + "args": { + "External id": 293164,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989106.112, "dur": 0.204, + "args": { + "External id": 293165,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989107.086, "dur": 0.201, + "args": { + "External id": 293166,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989108.414, "dur": 0.231, + "args": { + "External id": 293167,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989109.398, "dur": 0.198, + "args": { + "External id": 293168,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989110.343, "dur": 0.203, + "args": { + "External id": 293169,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989111.336, "dur": 0.233, + "args": { + "External id": 293170,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989112.321, "dur": 0.203, + "args": { + "External id": 293171,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989113.325, "dur": 0.198, + "args": { + "External id": 293172,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989115.980, "dur": 0.204, + "args": { + "External id": 293173,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989117.031, "dur": 0.217, + "args": { + "External id": 293174,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989118.058, "dur": 0.229, + "args": { + "External id": 293175,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989119.055, "dur": 0.198, + "args": { + "External id": 293176,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989120.034, "dur": 0.202, + "args": { + "External id": 293177,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989120.991, "dur": 0.197, + "args": { + "External id": 293178,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989121.948, "dur": 0.200, + "args": { + "External id": 293179,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989122.921, "dur": 0.197, + "args": { + "External id": 293180,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989125.787, "dur": 0.200, + "args": { + "External id": 293181,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989126.745, "dur": 0.217, + "args": { + "External id": 293182,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989127.793, "dur": 0.241, + "args": { + "External id": 293183,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989128.838, "dur": 0.213, + "args": { + "External id": 293184,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989129.805, "dur": 0.203, + "args": { + "External id": 293185,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989130.802, "dur": 0.198, + "args": { + "External id": 293186,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989131.751, "dur": 0.199, + "args": { + "External id": 293187,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989132.700, "dur": 0.198, + "args": { + "External id": 293188,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989137.095, "dur": 0.412, + "args": { + "External id": 293189,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989138.310, "dur": 0.197, + "args": { + "External id": 293190,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989139.303, "dur": 0.215, + "args": { + "External id": 293191,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989140.284, "dur": 0.450, + "args": { + "External id": 293192,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989141.496, "dur": 0.417, + "args": { + "External id": 293193,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989142.698, "dur": 0.500, + "args": { + "External id": 293194,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989144.023, "dur": 0.240, + "args": { + "External id": 293195,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989145.045, "dur": 0.370, + "args": { + "External id": 293196,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989148.153, "dur": 0.536, + "args": { + "External id": 293197,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989149.617, "dur": 0.221, + "args": { + "External id": 293198,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989150.984, "dur": 0.223, + "args": { + "External id": 293199,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989151.974, "dur": 0.216, + "args": { + "External id": 293200,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989153.345, "dur": 0.204, + "args": { + "External id": 293201,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989154.526, "dur": 0.200, + "args": { + "External id": 293202,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989155.474, "dur": 0.226, + "args": { + "External id": 293203,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989156.691, "dur": 0.194, + "args": { + "External id": 293204,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989159.252, "dur": 0.203, + "args": { + "External id": 293205,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989160.231, "dur": 0.195, + "args": { + "External id": 293206,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989161.201, "dur": 0.203, + "args": { + "External id": 293207,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989162.234, "dur": 0.380, + "args": { + "External id": 293208,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989163.449, "dur": 0.226, + "args": { + "External id": 293209,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989164.480, "dur": 12.942, + "args": { + "External id": 293210,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989180.891, "dur": 0.470, + "args": { + "External id": 293211,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989182.418, "dur": 0.263, + "args": { + "External id": 293212,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989185.250, "dur": 0.398, + "args": { + "External id": 293213,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989186.587, "dur": 0.419, + "args": { + "External id": 293214,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989187.960, "dur": 0.222, + "args": { + "External id": 293215,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989189.033, "dur": 0.219, + "args": { + "External id": 293216,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989190.087, "dur": 0.200, + "args": { + "External id": 293217,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989191.167, "dur": 0.364, + "args": { + "External id": 293218,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989192.347, "dur": 0.198, + "args": { + "External id": 293219,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989193.492, "dur": 0.219, + "args": { + "External id": 293220,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989196.563, "dur": 0.200, + "args": { + "External id": 293221,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989197.773, "dur": 0.203, + "args": { + "External id": 293222,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989198.773, "dur": 0.196, + "args": { + "External id": 293223,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989199.942, "dur": 0.204, + "args": { + "External id": 293224,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989200.986, "dur": 0.214, + "args": { + "External id": 293225,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989201.973, "dur": 0.235, + "args": { + "External id": 293226,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989203.066, "dur": 0.198, + "args": { + "External id": 293227,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989204.086, "dur": 0.203, + "args": { + "External id": 293228,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989207.439, "dur": 0.207, + "args": { + "External id": 293229,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989208.499, "dur": 0.214, + "args": { + "External id": 293230,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989209.571, "dur": 0.209, + "args": { + "External id": 293231,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989210.585, "dur": 0.432, + "args": { + "External id": 293232,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989211.794, "dur": 0.211, + "args": { + "External id": 293233,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989212.819, "dur": 0.447, + "args": { + "External id": 293234,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989214.489, "dur": 0.239, + "args": { + "External id": 293235,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989215.502, "dur": 0.209, + "args": { + "External id": 293236,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989218.267, "dur": 0.198, + "args": { + "External id": 293237,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989219.238, "dur": 0.204, + "args": { + "External id": 293238,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989220.261, "dur": 0.199, + "args": { + "External id": 293239,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989221.273, "dur": 0.238, + "args": { + "External id": 293240,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989222.377, "dur": 0.203, + "args": { + "External id": 293241,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989223.344, "dur": 0.205, + "args": { + "External id": 293242,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989224.322, "dur": 0.202, + "args": { + "External id": 293243,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989225.459, "dur": 0.230, + "args": { + "External id": 293244,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989228.164, "dur": 0.198, + "args": { + "External id": 293245,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989229.442, "dur": 0.203, + "args": { + "External id": 293246,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989230.429, "dur": 0.200, + "args": { + "External id": 293247,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989231.398, "dur": 0.206, + "args": { + "External id": 293248,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989232.369, "dur": 0.227, + "args": { + "External id": 293249,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989233.401, "dur": 0.208, + "args": { + "External id": 293250,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989234.371, "dur": 0.203, + "args": { + "External id": 293251,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989235.379, "dur": 0.207, + "args": { + "External id": 293252,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989238.185, "dur": 0.224, + "args": { + "External id": 293253,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989239.186, "dur": 0.202, + "args": { + "External id": 293254,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989240.210, "dur": 0.200, + "args": { + "External id": 293255,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989241.283, "dur": 0.207, + "args": { + "External id": 293256,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368989242.304, "dur": 0.205, + "args": { + "External id": 293257,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070554, "tid": 2070554, + "ts": 5333368989300.693, "dur": 1698.982, + "args": { + "External id": 293258,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070554, "tid": 2070554, + "ts": 5333368989770.882, "dur": 1152.056, + "args": { + "External id": 293259,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989781.764, "dur": 8.684, + "args": { + "External id": 293260,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989786.463, "dur": 3.234, + "args": { + "External id": 293261,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989791.014, "dur": 3.827, + "args": { + "External id": 293262,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989792.036, "dur": 2.668, + "args": { + "External id": 293263,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989795.317, "dur": 2.904, + "args": { + "External id": 293264,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989797.306, "dur": 0.828, + "args": { + "External id": 293265,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989800.901, "dur": 1.467, + "args": { + "External id": 293266,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989801.499, "dur": 0.802, + "args": { + "External id": 293267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989802.623, "dur": 4.977, + "args": { + "External id": 293268,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989807.157, "dur": 0.374, + "args": { + "External id": 293269,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989808.030, "dur": 1.524, + "args": { + "External id": 293270,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989808.677, "dur": 0.811, + "args": { + "External id": 293271,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989810.206, "dur": 1.128, + "args": { + "External id": 293272,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989810.570, "dur": 0.696, + "args": { + "External id": 293273,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989811.958, "dur": 3.368, + "args": { + "External id": 293274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989814.754, "dur": 0.505, + "args": { + "External id": 293275,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989815.793, "dur": 0.900, + "args": { + "External id": 293276,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989816.219, "dur": 0.407, + "args": { + "External id": 293277,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989816.961, "dur": 3.271, + "args": { + "External id": 293278,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989817.324, "dur": 2.835, + "args": { + "External id": 293279,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989820.477, "dur": 2.171, + "args": { + "External id": 293280,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989822.134, "dur": 0.446, + "args": { + "External id": 293281,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989824.940, "dur": 1.142, + "args": { + "External id": 293282,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989825.373, "dur": 0.617, + "args": { + "External id": 293283,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989826.310, "dur": 4.144, + "args": { + "External id": 293284,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989829.906, "dur": 0.484, + "args": { + "External id": 293285,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989830.918, "dur": 0.968, + "args": { + "External id": 293286,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989831.289, "dur": 0.528, + "args": { + "External id": 293287,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989832.301, "dur": 1.766, + "args": { + "External id": 293288,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989832.879, "dur": 1.121, + "args": { + "External id": 293289,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989834.630, "dur": 3.949, + "args": { + "External id": 293290,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989837.950, "dur": 0.563, + "args": { + "External id": 293291,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989838.805, "dur": 1.662, + "args": { + "External id": 293292,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989839.523, "dur": 0.878, + "args": { + "External id": 293293,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989840.864, "dur": 2.857, + "args": { + "External id": 293294,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989841.459, "dur": 2.192, + "args": { + "External id": 293295,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989844.159, "dur": 2.458, + "args": { + "External id": 293296,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989845.704, "dur": 0.849, + "args": { + "External id": 293297,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989848.873, "dur": 1.163, + "args": { + "External id": 293298,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989849.292, "dur": 0.678, + "args": { + "External id": 293299,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989850.298, "dur": 4.542, + "args": { + "External id": 293300,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989854.084, "dur": 0.692, + "args": { + "External id": 293301,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989855.108, "dur": 1.306, + "args": { + "External id": 293302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989855.730, "dur": 0.617, + "args": { + "External id": 293303,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989856.814, "dur": 1.729, + "args": { + "External id": 293304,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989857.460, "dur": 1.016, + "args": { + "External id": 293305,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989858.940, "dur": 3.470, + "args": { + "External id": 293306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989861.916, "dur": 0.427, + "args": { + "External id": 293307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989862.636, "dur": 1.344, + "args": { + "External id": 293308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989863.009, "dur": 0.905, + "args": { + "External id": 293309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989864.221, "dur": 3.272, + "args": { + "External id": 293310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989864.754, "dur": 2.499, + "args": { + "External id": 293311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989867.762, "dur": 2.512, + "args": { + "External id": 293312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989869.480, "dur": 0.726, + "args": { + "External id": 293313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989872.352, "dur": 1.210, + "args": { + "External id": 293314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989872.792, "dur": 0.706, + "args": { + "External id": 293315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989873.820, "dur": 4.483, + "args": { + "External id": 293316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989877.653, "dur": 0.588, + "args": { + "External id": 293317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989878.717, "dur": 1.506, + "args": { + "External id": 293318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989879.306, "dur": 0.851, + "args": { + "External id": 293319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989880.748, "dur": 1.421, + "args": { + "External id": 293320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989881.322, "dur": 0.779, + "args": { + "External id": 293321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989882.414, "dur": 3.079, + "args": { + "External id": 293322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989885.012, "dur": 0.411, + "args": { + "External id": 293323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989885.800, "dur": 1.384, + "args": { + "External id": 293324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989886.167, "dur": 0.952, + "args": { + "External id": 293325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989887.618, "dur": 3.007, + "args": { + "External id": 293326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989888.221, "dur": 2.151, + "args": { + "External id": 293327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989891.084, "dur": 2.296, + "args": { + "External id": 293328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989892.866, "dur": 0.447, + "args": { + "External id": 293329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989895.268, "dur": 0.919, + "args": { + "External id": 293330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989895.625, "dur": 0.494, + "args": { + "External id": 293331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989896.595, "dur": 3.917, + "args": { + "External id": 293332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989899.718, "dur": 0.729, + "args": { + "External id": 293333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989901.012, "dur": 1.491, + "args": { + "External id": 293334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989901.595, "dur": 0.815, + "args": { + "External id": 293335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989902.946, "dur": 1.431, + "args": { + "External id": 293336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989903.613, "dur": 0.694, + "args": { + "External id": 293337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989904.610, "dur": 3.870, + "args": { + "External id": 293338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989907.611, "dur": 0.799, + "args": { + "External id": 293339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989908.737, "dur": 2.204, + "args": { + "External id": 293340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989909.933, "dur": 0.942, + "args": { + "External id": 293341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989911.194, "dur": 4.443, + "args": { + "External id": 293342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989912.445, "dur": 2.813, + "args": { + "External id": 293343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989915.897, "dur": 1.775, + "args": { + "External id": 293344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989916.985, "dur": 0.618, + "args": { + "External id": 293345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989920.039, "dur": 1.632, + "args": { + "External id": 293346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989920.812, "dur": 0.793, + "args": { + "External id": 293347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989922.298, "dur": 3.634, + "args": { + "External id": 293348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989925.210, "dur": 0.657, + "args": { + "External id": 293349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989926.389, "dur": 1.859, + "args": { + "External id": 293350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989927.468, "dur": 0.721, + "args": { + "External id": 293351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989928.501, "dur": 1.806, + "args": { + "External id": 293352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989929.637, "dur": 0.604, + "args": { + "External id": 293353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989930.771, "dur": 4.018, + "args": { + "External id": 293354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989934.245, "dur": 0.475, + "args": { + "External id": 293355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989935.055, "dur": 2.160, + "args": { + "External id": 293356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989936.362, "dur": 0.776, + "args": { + "External id": 293357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989937.652, "dur": 3.222, + "args": { + "External id": 293358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989938.324, "dur": 2.479, + "args": { + "External id": 293359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989941.314, "dur": 1.759, + "args": { + "External id": 293360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989942.271, "dur": 0.735, + "args": { + "External id": 293361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989945.257, "dur": 1.843, + "args": { + "External id": 293362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989946.274, "dur": 0.757, + "args": { + "External id": 293363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989947.796, "dur": 4.127, + "args": { + "External id": 293364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989950.960, "dur": 0.897, + "args": { + "External id": 293365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989952.364, "dur": 1.983, + "args": { + "External id": 293366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989953.424, "dur": 0.856, + "args": { + "External id": 293367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989954.600, "dur": 2.069, + "args": { + "External id": 293368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989955.373, "dur": 1.230, + "args": { + "External id": 293369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989956.920, "dur": 3.832, + "args": { + "External id": 293370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989959.756, "dur": 0.927, + "args": { + "External id": 293371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989961.218, "dur": 1.745, + "args": { + "External id": 293372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989962.177, "dur": 0.723, + "args": { + "External id": 293373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989963.390, "dur": 3.344, + "args": { + "External id": 293374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368989964.235, "dur": 2.428, + "args": { + "External id": 293375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368989967.000, "dur": 37.227, + "args": { + "External id": 293376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990003.308, "dur": 0.774, + "args": { + "External id": 298497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990006.634, "dur": 3.211, + "args": { + "External id": 298498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990007.397, "dur": 2.378, + "args": { + "External id": 298499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990010.130, "dur": 1.778, + "args": { + "External id": 298500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990011.340, "dur": 0.502, + "args": { + "External id": 298501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990012.598, "dur": 1.745, + "args": { + "External id": 298502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990013.270, "dur": 1.006, + "args": { + "External id": 298503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990014.777, "dur": 3.908, + "args": { + "External id": 298504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990017.667, "dur": 0.950, + "args": { + "External id": 298505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990018.961, "dur": 1.507, + "args": { + "External id": 298506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990019.750, "dur": 0.652, + "args": { + "External id": 298507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990020.851, "dur": 7.248, + "args": { + "External id": 298508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990025.588, "dur": 2.438, + "args": { + "External id": 298509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990028.417, "dur": 1.730, + "args": { + "External id": 298510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990029.084, "dur": 1.000, + "args": { + "External id": 298511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990030.638, "dur": 1.768, + "args": { + "External id": 298512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990031.475, "dur": 0.862, + "args": { + "External id": 298513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990034.270, "dur": 2.913, + "args": { + "External id": 298514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990036.519, "dur": 0.597, + "args": { + "External id": 298515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990037.491, "dur": 1.482, + "args": { + "External id": 298516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990038.211, "dur": 0.691, + "args": { + "External id": 298517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990039.225, "dur": 1.590, + "args": { + "External id": 298518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990039.902, "dur": 0.846, + "args": { + "External id": 298519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990041.274, "dur": 3.258, + "args": { + "External id": 298520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990043.834, "dur": 0.630, + "args": { + "External id": 298521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990044.787, "dur": 1.698, + "args": { + "External id": 298522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990045.608, "dur": 0.813, + "args": { + "External id": 298523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990046.738, "dur": 3.398, + "args": { + "External id": 298524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990047.435, "dur": 2.455, + "args": { + "External id": 298525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990050.422, "dur": 1.610, + "args": { + "External id": 298526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990051.331, "dur": 0.637, + "args": { + "External id": 298527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990052.322, "dur": 1.854, + "args": { + "External id": 298528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990053.146, "dur": 0.962, + "args": { + "External id": 298529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990056.379, "dur": 3.010, + "args": { + "External id": 298530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990058.786, "dur": 0.542, + "args": { + "External id": 298531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990059.694, "dur": 1.626, + "args": { + "External id": 298532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990060.460, "dur": 0.795, + "args": { + "External id": 298533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990061.779, "dur": 1.520, + "args": { + "External id": 298534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990062.509, "dur": 0.721, + "args": { + "External id": 298535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990063.568, "dur": 3.720, + "args": { + "External id": 298536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990066.555, "dur": 0.664, + "args": { + "External id": 298537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990067.545, "dur": 1.429, + "args": { + "External id": 298538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990068.212, "dur": 0.694, + "args": { + "External id": 298539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990069.226, "dur": 3.043, + "args": { + "External id": 298540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990069.887, "dur": 2.307, + "args": { + "External id": 298541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990072.558, "dur": 1.264, + "args": { + "External id": 298542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990073.093, "dur": 0.661, + "args": { + "External id": 298543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990074.296, "dur": 1.966, + "args": { + "External id": 298544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990075.102, "dur": 1.090, + "args": { + "External id": 298545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990078.437, "dur": 4.148, + "args": { + "External id": 298546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990081.579, "dur": 0.938, + "args": { + "External id": 298547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990082.901, "dur": 1.244, + "args": { + "External id": 298548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990083.536, "dur": 0.539, + "args": { + "External id": 298549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990084.427, "dur": 1.064, + "args": { + "External id": 298550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990085.008, "dur": 0.414, + "args": { + "External id": 298551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990086.007, "dur": 3.654, + "args": { + "External id": 298552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990088.988, "dur": 0.607, + "args": { + "External id": 298553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990089.946, "dur": 1.426, + "args": { + "External id": 298554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990090.532, "dur": 0.774, + "args": { + "External id": 298555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990091.632, "dur": 3.155, + "args": { + "External id": 298556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990092.265, "dur": 2.265, + "args": { + "External id": 298557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990095.072, "dur": 1.401, + "args": { + "External id": 298558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990095.770, "dur": 0.636, + "args": { + "External id": 298559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990096.724, "dur": 1.764, + "args": { + "External id": 298560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990097.413, "dur": 1.005, + "args": { + "External id": 298561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990100.705, "dur": 3.313, + "args": { + "External id": 298562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990103.231, "dur": 0.718, + "args": { + "External id": 298563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990104.297, "dur": 1.393, + "args": { + "External id": 298564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990104.781, "dur": 0.842, + "args": { + "External id": 298565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990106.035, "dur": 1.280, + "args": { + "External id": 298566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990106.514, "dur": 0.733, + "args": { + "External id": 298567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990107.587, "dur": 3.223, + "args": { + "External id": 298568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990109.887, "dur": 0.855, + "args": { + "External id": 298569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990111.080, "dur": 1.267, + "args": { + "External id": 298570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990111.609, "dur": 0.674, + "args": { + "External id": 298571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990112.601, "dur": 2.989, + "args": { + "External id": 298572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990113.459, "dur": 2.061, + "args": { + "External id": 298573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990115.846, "dur": 1.705, + "args": { + "External id": 298574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990116.796, "dur": 0.688, + "args": { + "External id": 298575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990117.830, "dur": 1.880, + "args": { + "External id": 298576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990118.905, "dur": 0.741, + "args": { + "External id": 298577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990121.772, "dur": 3.531, + "args": { + "External id": 298578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990124.643, "dur": 0.596, + "args": { + "External id": 298579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990125.595, "dur": 1.345, + "args": { + "External id": 298580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990126.283, "dur": 0.590, + "args": { + "External id": 298581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990127.210, "dur": 1.494, + "args": { + "External id": 298582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990127.925, "dur": 0.714, + "args": { + "External id": 298583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990128.964, "dur": 3.484, + "args": { + "External id": 298584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990131.607, "dur": 0.774, + "args": { + "External id": 298585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990132.710, "dur": 1.622, + "args": { + "External id": 298586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990133.614, "dur": 0.651, + "args": { + "External id": 298587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990134.585, "dur": 3.982, + "args": { + "External id": 298588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990135.943, "dur": 2.491, + "args": { + "External id": 298589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990140.905, "dur": 1.538, + "args": { + "External id": 298590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990141.750, "dur": 0.624, + "args": { + "External id": 298591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990143.115, "dur": 3.989, + "args": { + "External id": 298592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990144.430, "dur": 2.588, + "args": { + "External id": 298593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990149.747, "dur": 1.915, + "args": { + "External id": 298594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990150.831, "dur": 0.764, + "args": { + "External id": 298595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990151.913, "dur": 1.932, + "args": { + "External id": 298596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990152.783, "dur": 0.999, + "args": { + "External id": 298597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990154.104, "dur": 3.950, + "args": { + "External id": 298598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990157.013, "dur": 0.977, + "args": { + "External id": 298599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990158.378, "dur": 1.475, + "args": { + "External id": 298600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990159.103, "dur": 0.679, + "args": { + "External id": 298601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990160.110, "dur": 2.092, + "args": { + "External id": 298602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990161.411, "dur": 0.727, + "args": { + "External id": 298603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990162.650, "dur": 16.161, + "args": { + "External id": 298604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990177.524, "dur": 0.894, + "args": { + "External id": 298605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990179.529, "dur": 1.749, + "args": { + "External id": 298606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990180.712, "dur": 0.500, + "args": { + "External id": 298607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990181.569, "dur": 3.178, + "args": { + "External id": 298608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990182.335, "dur": 2.161, + "args": { + "External id": 298609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990186.742, "dur": 1.987, + "args": { + "External id": 298610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990187.801, "dur": 0.860, + "args": { + "External id": 298611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990189.001, "dur": 2.040, + "args": { + "External id": 298612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990190.027, "dur": 0.938, + "args": { + "External id": 298613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990191.308, "dur": 3.472, + "args": { + "External id": 298614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990194.299, "dur": 0.409, + "args": { + "External id": 298615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990195.095, "dur": 1.299, + "args": { + "External id": 298616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990195.722, "dur": 0.602, + "args": { + "External id": 298617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990196.894, "dur": 2.109, + "args": { + "External id": 298618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990198.199, "dur": 0.732, + "args": { + "External id": 298619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990199.290, "dur": 3.688, + "args": { + "External id": 298620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990202.463, "dur": 0.445, + "args": { + "External id": 298621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990203.242, "dur": 1.832, + "args": { + "External id": 298622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990204.127, "dur": 0.873, + "args": { + "External id": 298623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990205.574, "dur": 3.268, + "args": { + "External id": 298624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990206.742, "dur": 2.025, + "args": { + "External id": 298625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990210.927, "dur": 1.877, + "args": { + "External id": 298626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990211.955, "dur": 0.779, + "args": { + "External id": 298627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990213.093, "dur": 1.711, + "args": { + "External id": 298628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990214.058, "dur": 0.673, + "args": { + "External id": 298629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990215.065, "dur": 3.788, + "args": { + "External id": 298630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990218.002, "dur": 0.785, + "args": { + "External id": 298631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990219.140, "dur": 1.714, + "args": { + "External id": 298632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990220.140, "dur": 0.645, + "args": { + "External id": 298633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990221.116, "dur": 1.389, + "args": { + "External id": 298634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990221.780, "dur": 0.655, + "args": { + "External id": 298635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990222.801, "dur": 3.524, + "args": { + "External id": 298636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990225.632, "dur": 0.625, + "args": { + "External id": 298637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990226.586, "dur": 1.626, + "args": { + "External id": 298638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990227.482, "dur": 0.663, + "args": { + "External id": 298639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990228.641, "dur": 3.661, + "args": { + "External id": 298640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990229.354, "dur": 2.853, + "args": { + "External id": 298641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990234.449, "dur": 1.489, + "args": { + "External id": 298642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990235.345, "dur": 0.523, + "args": { + "External id": 298643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990236.197, "dur": 2.081, + "args": { + "External id": 298644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990237.473, "dur": 0.737, + "args": { + "External id": 298645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990238.780, "dur": 4.166, + "args": { + "External id": 298646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990242.053, "dur": 0.823, + "args": { + "External id": 298647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990243.306, "dur": 1.332, + "args": { + "External id": 298648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990244.022, "dur": 0.549, + "args": { + "External id": 298649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990244.897, "dur": 1.765, + "args": { + "External id": 298650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990246.108, "dur": 0.487, + "args": { + "External id": 298651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990246.944, "dur": 3.929, + "args": { + "External id": 298652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990249.961, "dur": 0.843, + "args": { + "External id": 298653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990251.133, "dur": 1.711, + "args": { + "External id": 298654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990251.986, "dur": 0.791, + "args": { + "External id": 298655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990253.108, "dur": 3.325, + "args": { + "External id": 298656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990254.193, "dur": 2.083, + "args": { + "External id": 298657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990258.770, "dur": 1.383, + "args": { + "External id": 298658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990259.432, "dur": 0.656, + "args": { + "External id": 298659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990260.456, "dur": 1.588, + "args": { + "External id": 298660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990261.279, "dur": 0.698, + "args": { + "External id": 298661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990262.304, "dur": 4.206, + "args": { + "External id": 298662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990265.777, "dur": 0.665, + "args": { + "External id": 298663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990266.793, "dur": 1.504, + "args": { + "External id": 298664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990267.457, "dur": 0.774, + "args": { + "External id": 298665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990268.754, "dur": 1.738, + "args": { + "External id": 298666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990269.544, "dur": 0.881, + "args": { + "External id": 298667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990270.753, "dur": 3.181, + "args": { + "External id": 298668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990273.260, "dur": 0.608, + "args": { + "External id": 298669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990274.200, "dur": 1.906, + "args": { + "External id": 298670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990275.316, "dur": 0.724, + "args": { + "External id": 298671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990276.369, "dur": 3.450, + "args": { + "External id": 298672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990277.269, "dur": 2.480, + "args": { + "External id": 298673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990281.620, "dur": 2.283, + "args": { + "External id": 298674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990282.895, "dur": 0.944, + "args": { + "External id": 298675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990284.233, "dur": 1.702, + "args": { + "External id": 298676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990284.850, "dur": 1.016, + "args": { + "External id": 298677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990286.237, "dur": 4.318, + "args": { + "External id": 298678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990289.729, "dur": 0.766, + "args": { + "External id": 298679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990290.835, "dur": 1.562, + "args": { + "External id": 298680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990291.870, "dur": 0.462, + "args": { + "External id": 298681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990292.864, "dur": 1.944, + "args": { + "External id": 298682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990294.036, "dur": 0.704, + "args": { + "External id": 298683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990295.242, "dur": 3.412, + "args": { + "External id": 298684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990297.701, "dur": 0.888, + "args": { + "External id": 298685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990299.117, "dur": 1.688, + "args": { + "External id": 298686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990300.141, "dur": 0.598, + "args": { + "External id": 298687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990301.066, "dur": 3.203, + "args": { + "External id": 298688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990302.032, "dur": 2.167, + "args": { + "External id": 298689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990306.384, "dur": 1.882, + "args": { + "External id": 298690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990307.317, "dur": 0.884, + "args": { + "External id": 298691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990308.528, "dur": 2.243, + "args": { + "External id": 298692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990309.610, "dur": 1.094, + "args": { + "External id": 298693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990311.034, "dur": 4.563, + "args": { + "External id": 298694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990314.665, "dur": 0.861, + "args": { + "External id": 298695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990316.129, "dur": 1.630, + "args": { + "External id": 298696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990317.197, "dur": 0.491, + "args": { + "External id": 298697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990318.233, "dur": 1.677, + "args": { + "External id": 298698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990319.171, "dur": 0.669, + "args": { + "External id": 298699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990320.179, "dur": 4.368, + "args": { + "External id": 298700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990323.677, "dur": 0.804, + "args": { + "External id": 298701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990324.820, "dur": 1.898, + "args": { + "External id": 298702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990325.775, "dur": 0.877, + "args": { + "External id": 298703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990326.974, "dur": 3.010, + "args": { + "External id": 298704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990327.610, "dur": 2.292, + "args": { + "External id": 298705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990332.532, "dur": 2.113, + "args": { + "External id": 298706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990333.777, "dur": 0.804, + "args": { + "External id": 298707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990334.907, "dur": 1.670, + "args": { + "External id": 298708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990335.861, "dur": 0.647, + "args": { + "External id": 298709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990336.942, "dur": 3.922, + "args": { + "External id": 298710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990340.045, "dur": 0.749, + "args": { + "External id": 298711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990341.157, "dur": 2.028, + "args": { + "External id": 298712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990342.540, "dur": 0.575, + "args": { + "External id": 298713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990343.455, "dur": 1.526, + "args": { + "External id": 298714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990344.390, "dur": 0.521, + "args": { + "External id": 298715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990345.258, "dur": 3.583, + "args": { + "External id": 298716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990348.083, "dur": 0.696, + "args": { + "External id": 298717,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990349.132, "dur": 1.787, + "args": { + "External id": 298718,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990350.053, "dur": 0.804, + "args": { + "External id": 298719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990351.204, "dur": 2.891, + "args": { + "External id": 298720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990352.147, "dur": 1.874, + "args": { + "External id": 298721,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990356.277, "dur": 1.491, + "args": { + "External id": 298722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990357.040, "dur": 0.657, + "args": { + "External id": 298723,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990358.025, "dur": 1.845, + "args": { + "External id": 298724,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990358.975, "dur": 0.826, + "args": { + "External id": 298725,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990360.334, "dur": 4.061, + "args": { + "External id": 298726,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990363.928, "dur": 0.400, + "args": { + "External id": 298727,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990364.785, "dur": 1.691, + "args": { + "External id": 298728,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990365.839, "dur": 0.571, + "args": { + "External id": 298729,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990366.746, "dur": 1.661, + "args": { + "External id": 298730,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990367.460, "dur": 0.876, + "args": { + "External id": 298731,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990368.669, "dur": 3.659, + "args": { + "External id": 298732,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990371.407, "dur": 0.854, + "args": { + "External id": 298733,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990372.592, "dur": 1.677, + "args": { + "External id": 298734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990373.530, "dur": 0.669, + "args": { + "External id": 298735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990374.537, "dur": 3.812, + "args": { + "External id": 298736,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990375.730, "dur": 2.373, + "args": { + "External id": 298737,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990380.569, "dur": 1.918, + "args": { + "External id": 298738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990381.741, "dur": 0.674, + "args": { + "External id": 298739,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990382.763, "dur": 1.676, + "args": { + "External id": 298740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990383.404, "dur": 0.966, + "args": { + "External id": 298741,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990384.896, "dur": 3.944, + "args": { + "External id": 298742,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990388.002, "dur": 0.773, + "args": { + "External id": 298743,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990389.123, "dur": 1.681, + "args": { + "External id": 298744,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990390.071, "dur": 0.666, + "args": { + "External id": 298745,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990391.068, "dur": 1.831, + "args": { + "External id": 298746,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990391.990, "dur": 0.840, + "args": { + "External id": 298747,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990393.158, "dur": 3.036, + "args": { + "External id": 298748,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990395.494, "dur": 0.631, + "args": { + "External id": 298749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990396.481, "dur": 2.029, + "args": { + "External id": 298750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990397.850, "dur": 0.592, + "args": { + "External id": 298751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990398.775, "dur": 2.879, + "args": { + "External id": 298752,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990399.470, "dur": 2.108, + "args": { + "External id": 298753,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990403.594, "dur": 1.783, + "args": { + "External id": 298754,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990404.752, "dur": 0.559, + "args": { + "External id": 298755,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990405.634, "dur": 1.426, + "args": { + "External id": 298756,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990406.365, "dur": 0.629, + "args": { + "External id": 298757,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990407.642, "dur": 4.131, + "args": { + "External id": 298758,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990410.878, "dur": 0.825, + "args": { + "External id": 298759,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990412.055, "dur": 1.712, + "args": { + "External id": 298760,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990412.745, "dur": 0.953, + "args": { + "External id": 298761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990414.205, "dur": 2.000, + "args": { + "External id": 298762,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990415.258, "dur": 0.878, + "args": { + "External id": 298763,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990416.487, "dur": 3.193, + "args": { + "External id": 298764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990419.095, "dur": 0.518, + "args": { + "External id": 298765,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990420.011, "dur": 1.800, + "args": { + "External id": 298766,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990420.948, "dur": 0.800, + "args": { + "External id": 298767,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990422.309, "dur": 3.477, + "args": { + "External id": 298768,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990423.431, "dur": 2.101, + "args": { + "External id": 298769,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990427.919, "dur": 1.758, + "args": { + "External id": 298770,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990428.883, "dur": 0.727, + "args": { + "External id": 298771,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990430.025, "dur": 1.854, + "args": { + "External id": 298772,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990430.900, "dur": 0.912, + "args": { + "External id": 298773,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990432.142, "dur": 3.774, + "args": { + "External id": 298774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990435.052, "dur": 0.800, + "args": { + "External id": 298775,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990436.228, "dur": 1.708, + "args": { + "External id": 298776,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990437.012, "dur": 0.847, + "args": { + "External id": 298777,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990438.366, "dur": 1.817, + "args": { + "External id": 298778,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990439.358, "dur": 0.753, + "args": { + "External id": 298779,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990440.443, "dur": 3.422, + "args": { + "External id": 298780,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990443.020, "dur": 0.773, + "args": { + "External id": 298781,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990444.203, "dur": 1.895, + "args": { + "External id": 298782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990445.473, "dur": 0.551, + "args": { + "External id": 298783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990446.355, "dur": 3.476, + "args": { + "External id": 298784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990447.142, "dur": 2.616, + "args": { + "External id": 298785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990451.767, "dur": 2.244, + "args": { + "External id": 298786,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990453.209, "dur": 0.733, + "args": { + "External id": 298787,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990454.285, "dur": 1.753, + "args": { + "External id": 298788,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990455.189, "dur": 0.783, + "args": { + "External id": 298789,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990456.326, "dur": 4.730, + "args": { + "External id": 298790,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990459.781, "dur": 1.203, + "args": { + "External id": 298791,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990461.404, "dur": 1.659, + "args": { + "External id": 298792,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990462.341, "dur": 0.654, + "args": { + "External id": 298793,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990463.329, "dur": 1.801, + "args": { + "External id": 298794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990464.287, "dur": 0.776, + "args": { + "External id": 298795,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990465.573, "dur": 3.723, + "args": { + "External id": 298796,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990468.463, "dur": 0.764, + "args": { + "External id": 298797,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990469.566, "dur": 1.698, + "args": { + "External id": 298798,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990470.411, "dur": 0.780, + "args": { + "External id": 298799,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990471.664, "dur": 2.853, + "args": { + "External id": 298800,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990472.560, "dur": 1.883, + "args": { + "External id": 298801,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990476.399, "dur": 2.415, + "args": { + "External id": 298802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990477.990, "dur": 0.757, + "args": { + "External id": 298803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990479.347, "dur": 1.598, + "args": { + "External id": 298804,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990480.176, "dur": 0.699, + "args": { + "External id": 298805,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990481.207, "dur": 3.227, + "args": { + "External id": 298806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990483.770, "dur": 0.600, + "args": { + "External id": 298807,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990484.728, "dur": 2.059, + "args": { + "External id": 298808,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990485.682, "dur": 1.035, + "args": { + "External id": 298809,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990487.044, "dur": 2.218, + "args": { + "External id": 298810,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990488.064, "dur": 1.125, + "args": { + "External id": 298811,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990489.521, "dur": 3.774, + "args": { + "External id": 298812,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990492.295, "dur": 0.936, + "args": { + "External id": 298813,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990493.555, "dur": 2.160, + "args": { + "External id": 298814,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990494.780, "dur": 0.868, + "args": { + "External id": 298815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990495.975, "dur": 3.731, + "args": { + "External id": 298816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990497.010, "dur": 2.625, + "args": { + "External id": 298817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990501.918, "dur": 1.904, + "args": { + "External id": 298818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990502.994, "dur": 0.764, + "args": { + "External id": 298819,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990504.080, "dur": 1.308, + "args": { + "External id": 298820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990504.728, "dur": 0.590, + "args": { + "External id": 298821,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990505.849, "dur": 4.039, + "args": { + "External id": 298822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990508.860, "dur": 0.960, + "args": { + "External id": 298823,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990510.206, "dur": 1.848, + "args": { + "External id": 298824,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990511.070, "dur": 0.919, + "args": { + "External id": 298825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990512.500, "dur": 2.108, + "args": { + "External id": 298826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990513.440, "dur": 1.102, + "args": { + "External id": 298827,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990515.295, "dur": 3.207, + "args": { + "External id": 298828,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990517.781, "dur": 0.651, + "args": { + "External id": 298829,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990518.766, "dur": 1.634, + "args": { + "External id": 298830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990519.736, "dur": 0.598, + "args": { + "External id": 298831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990520.673, "dur": 3.700, + "args": { + "External id": 298832,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990521.415, "dur": 2.592, + "args": { + "External id": 298833,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990526.569, "dur": 1.863, + "args": { + "External id": 298834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990527.405, "dur": 0.762, + "args": { + "External id": 298835,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990528.906, "dur": 2.535, + "args": { + "External id": 298836,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990530.244, "dur": 0.741, + "args": { + "External id": 298837,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990531.704, "dur": 3.982, + "args": { + "External id": 298838,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990534.786, "dur": 0.833, + "args": { + "External id": 298839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990535.965, "dur": 2.179, + "args": { + "External id": 298840,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990537.048, "dur": 0.799, + "args": { + "External id": 298841,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990538.436, "dur": 1.670, + "args": { + "External id": 298842,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990539.072, "dur": 0.872, + "args": { + "External id": 298843,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990540.395, "dur": 3.623, + "args": { + "External id": 298844,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990542.881, "dur": 0.677, + "args": { + "External id": 298845,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990545.692, "dur": 1.810, + "args": { + "External id": 298846,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990546.819, "dur": 0.615, + "args": { + "External id": 298847,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990547.784, "dur": 3.407, + "args": { + "External id": 298848,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990548.751, "dur": 2.367, + "args": { + "External id": 298849,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990553.154, "dur": 1.522, + "args": { + "External id": 298850,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990553.939, "dur": 0.673, + "args": { + "External id": 298851,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990554.935, "dur": 1.902, + "args": { + "External id": 298852,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990556.036, "dur": 0.730, + "args": { + "External id": 298853,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990557.097, "dur": 3.649, + "args": { + "External id": 298854,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990559.950, "dur": 0.729, + "args": { + "External id": 298855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990561.049, "dur": 1.461, + "args": { + "External id": 298856,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990561.879, "dur": 0.547, + "args": { + "External id": 298857,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990562.770, "dur": 1.980, + "args": { + "External id": 298858,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990563.577, "dur": 1.106, + "args": { + "External id": 298859,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990565.037, "dur": 3.313, + "args": { + "External id": 298860,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990567.706, "dur": 0.577, + "args": { + "External id": 298861,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990568.630, "dur": 1.996, + "args": { + "External id": 298862,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990569.911, "dur": 0.646, + "args": { + "External id": 298863,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990570.907, "dur": 3.485, + "args": { + "External id": 298864,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990571.871, "dur": 2.442, + "args": { + "External id": 298865,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990576.510, "dur": 2.009, + "args": { + "External id": 298866,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990577.598, "dur": 0.657, + "args": { + "External id": 298867,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990578.992, "dur": 2.036, + "args": { + "External id": 298868,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990580.052, "dur": 0.907, + "args": { + "External id": 298869,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990581.288, "dur": 3.840, + "args": { + "External id": 298870,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990584.296, "dur": 0.765, + "args": { + "External id": 298871,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990585.408, "dur": 1.500, + "args": { + "External id": 298872,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990586.090, "dur": 0.656, + "args": { + "External id": 298873,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990587.167, "dur": 1.981, + "args": { + "External id": 298874,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990588.378, "dur": 0.524, + "args": { + "External id": 298875,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990589.408, "dur": 3.004, + "args": { + "External id": 298876,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990591.684, "dur": 0.662, + "args": { + "External id": 298877,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990592.675, "dur": 2.120, + "args": { + "External id": 298878,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990593.890, "dur": 0.598, + "args": { + "External id": 298879,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990595.052, "dur": 3.709, + "args": { + "External id": 298880,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990596.097, "dur": 2.588, + "args": { + "External id": 298881,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990600.685, "dur": 1.524, + "args": { + "External id": 298882,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990601.611, "dur": 0.523, + "args": { + "External id": 298883,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990602.659, "dur": 2.116, + "args": { + "External id": 298884,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990603.569, "dur": 1.045, + "args": { + "External id": 298885,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990605.059, "dur": 4.035, + "args": { + "External id": 298886,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990608.154, "dur": 0.874, + "args": { + "External id": 298887,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990609.396, "dur": 1.463, + "args": { + "External id": 298888,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990610.297, "dur": 0.495, + "args": { + "External id": 298889,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990611.119, "dur": 1.764, + "args": { + "External id": 298890,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990611.952, "dur": 0.864, + "args": { + "External id": 298891,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990613.147, "dur": 3.870, + "args": { + "External id": 298892,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990616.132, "dur": 0.820, + "args": { + "External id": 298893,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990617.304, "dur": 1.810, + "args": { + "External id": 298894,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990618.179, "dur": 0.868, + "args": { + "External id": 298895,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990619.384, "dur": 30.208, + "args": { + "External id": 298896,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990620.052, "dur": 28.433, + "args": { + "External id": 298897,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990652.784, "dur": 2.275, + "args": { + "External id": 298898,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990654.237, "dur": 0.518, + "args": { + "External id": 298899,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990655.610, "dur": 1.821, + "args": { + "External id": 298900,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990656.655, "dur": 0.673, + "args": { + "External id": 298901,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990657.742, "dur": 3.279, + "args": { + "External id": 298902,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990660.324, "dur": 0.632, + "args": { + "External id": 298903,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990661.313, "dur": 1.997, + "args": { + "External id": 298904,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990662.379, "dur": 0.865, + "args": { + "External id": 298905,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990663.582, "dur": 1.696, + "args": { + "External id": 298906,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990664.263, "dur": 0.949, + "args": { + "External id": 298907,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990665.644, "dur": 3.464, + "args": { + "External id": 298908,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990668.078, "dur": 0.773, + "args": { + "External id": 298909,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990669.356, "dur": 2.043, + "args": { + "External id": 298910,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990670.234, "dur": 0.890, + "args": { + "External id": 298911,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990671.806, "dur": 3.178, + "args": { + "External id": 298912,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990672.803, "dur": 2.109, + "args": { + "External id": 298913,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990676.927, "dur": 2.068, + "args": { + "External id": 298914,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990678.082, "dur": 0.853, + "args": { + "External id": 298915,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990679.287, "dur": 2.435, + "args": { + "External id": 298916,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990680.348, "dur": 1.121, + "args": { + "External id": 298917,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990682.002, "dur": 4.193, + "args": { + "External id": 298918,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990685.321, "dur": 0.813, + "args": { + "External id": 298919,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990686.524, "dur": 1.772, + "args": { + "External id": 298920,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990687.475, "dur": 0.755, + "args": { + "External id": 298921,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990688.554, "dur": 2.272, + "args": { + "External id": 298922,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990689.495, "dur": 1.043, + "args": { + "External id": 298923,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990691.105, "dur": 3.845, + "args": { + "External id": 298924,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990693.897, "dur": 0.785, + "args": { + "External id": 298925,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990695.208, "dur": 1.953, + "args": { + "External id": 298926,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990696.498, "dur": 0.594, + "args": { + "External id": 298927,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990697.434, "dur": 3.336, + "args": { + "External id": 298928,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990698.378, "dur": 2.145, + "args": { + "External id": 298929,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990702.678, "dur": 1.497, + "args": { + "External id": 298930,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990703.396, "dur": 0.711, + "args": { + "External id": 298931,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990704.463, "dur": 1.988, + "args": { + "External id": 298932,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990705.451, "dur": 0.931, + "args": { + "External id": 298933,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990706.693, "dur": 4.864, + "args": { + "External id": 298934,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990710.634, "dur": 0.853, + "args": { + "External id": 298935,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990711.827, "dur": 2.006, + "args": { + "External id": 298936,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990712.926, "dur": 0.837, + "args": { + "External id": 298937,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990714.073, "dur": 1.772, + "args": { + "External id": 298938,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990715.062, "dur": 0.713, + "args": { + "External id": 298939,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990716.087, "dur": 3.591, + "args": { + "External id": 298940,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990718.903, "dur": 0.709, + "args": { + "External id": 298941,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990719.977, "dur": 1.803, + "args": { + "External id": 298942,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990721.077, "dur": 0.639, + "args": { + "External id": 298943,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990722.053, "dur": 3.271, + "args": { + "External id": 298944,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990722.966, "dur": 2.285, + "args": { + "External id": 298945,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990727.965, "dur": 2.004, + "args": { + "External id": 298946,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990728.920, "dur": 0.986, + "args": { + "External id": 298947,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990730.434, "dur": 2.044, + "args": { + "External id": 298948,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990731.369, "dur": 1.037, + "args": { + "External id": 298949,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990732.743, "dur": 3.437, + "args": { + "External id": 298950,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990735.331, "dur": 0.780, + "args": { + "External id": 298951,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990736.497, "dur": 1.331, + "args": { + "External id": 298952,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990737.240, "dur": 0.521, + "args": { + "External id": 298953,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990738.082, "dur": 2.310, + "args": { + "External id": 298954,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990739.343, "dur": 0.979, + "args": { + "External id": 298955,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990740.654, "dur": 3.762, + "args": { + "External id": 298956,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990743.587, "dur": 0.761, + "args": { + "External id": 298957,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990744.686, "dur": 1.998, + "args": { + "External id": 298958,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990745.791, "dur": 0.827, + "args": { + "External id": 298959,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990746.942, "dur": 3.164, + "args": { + "External id": 298960,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990747.938, "dur": 2.091, + "args": { + "External id": 298961,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070554, "tid": 2070554, + "ts": 5333368990752.049, "dur": 1.691, + "args": { + "External id": 298962,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368990752.911, "dur": 0.762, + "args": { + "External id": 298963,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070554, "tid": 2070554, + "ts": 5333368990773.571, "dur": 137.897, + "args": { + "External id": 298964,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070554, "tid": 2070554, + "ts": 5333368991013.678, "dur": 138.779, + "args": { + "External id": 298965,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070554, "tid": 2070554, + "ts": 5333368991077.699, "dur": 54.879, + "args": { + "External id": 298966,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070554, "tid": 2070554, + "ts": 5333368991092.641, "dur": 1.063, + "args": { + "External id": 298967,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2070554, "tid": 2070554, + "ts": 5333368991484.967, "dur": 946.018, + "args": { + "External id": 298968,"Sequence number": 1209237, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070554, "tid": 2070554, + "ts": 5333368991540.854, "dur": 66.914, + "args": { + "External id": 298969,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368991546.171, "dur": 1.774, + "args": { + "External id": 298970,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368991550.584, "dur": 0.591, + "args": { + "External id": 298971,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2070554, "tid": 2070554, + "ts": 5333368991666.940, "dur": 434.664, + "args": { + "External id": 298972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070554, "tid": 2070554, + "ts": 5333368991671.478, "dur": 54.270, + "args": { + "External id": 298973,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070554, "tid": 2070554, + "ts": 5333368991674.893, "dur": 13.955, + "args": { + "External id": 298974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333368991681.970, "dur": 5.853, + "args": { + "External id": 298975,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070554, "tid": 2070554, + "ts": 5333368991690.433, "dur": 34.713, + "args": { + "External id": 298976,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2070554, "tid": 2070554, + "ts": 5333368991736.427, "dur": 361.902, + "args": { + "External id": 298977,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333368991770.677, "dur": 321.948, + "args": { + "External id": 298978,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 7, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "7", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9390, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2070554, "tid": 2070554, + "ts": 5333368991790.035, "dur": 297.122, + "args": { + "External id": 298979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070554, "tid": 2070554, + "ts": 5333368992182.424, "dur": 209.968, + "args": { + "External id": 298980,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2070554, "tid": 2070554, + "ts": 5333368992272.190, "dur": 35.765, + "args": { + "External id": 298981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070554, "tid": 2070554, + "ts": 5333368992292.476, "dur": 5.177, + "args": { + "External id": 298982,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "7", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9394, "In msg nelems": 0, "Rank": 7, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070554, "tid": 2070554, + "ts": 5333368992336.947, "dur": 49.199, + "args": { + "External id": 298983,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368992340.162, "dur": 1.275, + "args": { + "External id": 298984,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368992343.151, "dur": 0.735, + "args": { + "External id": 298985,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2070554, "tid": 2070554, + "ts": 5333368992446.913, "dur": 21.143, + "args": { + "External id": 298986,"Sequence number": 1209238, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070554, "tid": 2070554, + "ts": 5333368992457.025, "dur": 7.989, + "args": { + "External id": 298987,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070554, "tid": 2070554, + "ts": 5333368992459.057, "dur": 5.813, + "args": { + "External id": 298988,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070554, "tid": 2070554, + "ts": 5333368992952.062, "dur": 50.199, + "args": { + "External id": 298989,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2070554, "tid": 2070554, + "ts": 5333368993012.476, "dur": 25.106, + "args": { + "External id": 298990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2070554, "tid": 2070554, + "ts": 5333368993045.840, "dur": 28.442, + "args": { + "External id": 298991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2070554, "tid": 2070554, + "ts": 5333368993088.867, "dur": 25.934, + "args": { + "External id": 298992,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368993092.487, "dur": 0.599, + "args": { + "External id": 298993,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070554, "tid": 2070554, + "ts": 5333368993131.700, "dur": 0.556, + "args": { + "External id": 298994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070554, "tid": 2070554, + "ts": 5333368993267.214, "dur": 743.773, + "args": { + "External id": 298995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070554, "tid": 2070554, + "ts": 5333368993799.612, "dur": 179.104, + "args": { + "External id": 298996,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2070554, "tid": 2070554, + "ts": 5333368994058.805, "dur": 30.158, + "args": { + "External id": 298997,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070554, "tid": 2070554, + "ts": 5333368994063.006, "dur": 25.299, + "args": { + "External id": 298998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070554, "tid": 2070554, + "ts": 5333368994092.842, "dur": 834.336, + "args": { + "External id": 298999,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070554, "tid": 2070554, + "ts": 5333368994094.556, "dur": 832.290, + "args": { + "External id": 299000,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070554, "tid": 2070554, + "ts": 5333368994097.020, "dur": 828.435, + "args": { + "External id": 299001,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2070554, "tid": 2070554, + "ts": 5333368994935.584, "dur": 72.411, + "args": { + "External id": 299002,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333368994939.124, "dur": 39.100, + "args": { + "External id": 299003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070554, "tid": 2070554, + "ts": 5333368994944.631, "dur": 3.729, + "args": { + "External id": 299004,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070554, "tid": 2070554, + "ts": 5333368994950.801, "dur": 27.112, + "args": { + "External id": 299005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070554, "tid": 2070554, + "ts": 5333368994956.228, "dur": 3.174, + "args": { + "External id": 299006,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070554, "tid": 2070554, + "ts": 5333368994979.901, "dur": 27.297, + "args": { + "External id": 299007,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070554, "tid": 2070554, + "ts": 5333368995010.650, "dur": 79.243, + "args": { + "External id": 299008,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070554, "tid": 2070554, + "ts": 5333368995050.168, "dur": 39.563, + "args": { + "External id": 299009,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070554, "tid": 2070554, + "ts": 5333368995051.328, "dur": 38.057, + "args": { + "External id": 299010,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2070554, "tid": 2070554, + "ts": 5333368995123.454, "dur": 5757.522, + "args": { + "External id": 299011,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2070554, "tid": 2070554, + "ts": 5333368995156.789, "dur": 5702.410, + "args": { + "External id": 299012,"Record function id": 0, "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2070554, "tid": 2070554, + "ts": 5333368996441.011, "dur": 279.082, + "args": { + "External id": 299013,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996461.961, "dur": 1.373, + "args": { + "External id": 299014,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996464.452, "dur": 0.048, + "args": { + "External id": 299015,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996465.104, "dur": 0.055, + "args": { + "External id": 299016,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996465.511, "dur": 0.064, + "args": { + "External id": 299017,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996466.203, "dur": 0.052, + "args": { + "External id": 299018,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996466.664, "dur": 0.063, + "args": { + "External id": 299019,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996467.173, "dur": 0.056, + "args": { + "External id": 299020,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996467.651, "dur": 0.070, + "args": { + "External id": 299021,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996468.191, "dur": 0.065, + "args": { + "External id": 299022,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996468.627, "dur": 0.054, + "args": { + "External id": 299023,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996469.095, "dur": 0.068, + "args": { + "External id": 299024,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996469.480, "dur": 0.347, + "args": { + "External id": 299025,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996470.233, "dur": 0.064, + "args": { + "External id": 299026,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996470.580, "dur": 0.057, + "args": { + "External id": 299027,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996470.984, "dur": 0.069, + "args": { + "External id": 299028,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996471.445, "dur": 0.063, + "args": { + "External id": 299029,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996471.836, "dur": 0.059, + "args": { + "External id": 299030,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996472.400, "dur": 0.055, + "args": { + "External id": 299031,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996472.757, "dur": 0.062, + "args": { + "External id": 299032,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996473.275, "dur": 0.060, + "args": { + "External id": 299033,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996473.654, "dur": 0.324, + "args": { + "External id": 299034,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996474.327, "dur": 0.084, + "args": { + "External id": 299035,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996474.766, "dur": 0.054, + "args": { + "External id": 299036,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996475.187, "dur": 0.065, + "args": { + "External id": 299037,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996475.621, "dur": 0.267, + "args": { + "External id": 299038,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996476.216, "dur": 0.089, + "args": { + "External id": 299039,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996476.628, "dur": 0.065, + "args": { + "External id": 299040,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996477.038, "dur": 0.076, + "args": { + "External id": 299041,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996477.608, "dur": 0.067, + "args": { + "External id": 299042,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996478.006, "dur": 0.061, + "args": { + "External id": 299043,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996478.361, "dur": 0.280, + "args": { + "External id": 299044,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996479.033, "dur": 0.055, + "args": { + "External id": 299045,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996479.481, "dur": 0.055, + "args": { + "External id": 299046,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996479.877, "dur": 0.060, + "args": { + "External id": 299047,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996480.276, "dur": 0.057, + "args": { + "External id": 299048,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996480.754, "dur": 0.057, + "args": { + "External id": 299049,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996481.098, "dur": 0.053, + "args": { + "External id": 299050,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996481.510, "dur": 0.067, + "args": { + "External id": 299051,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996481.935, "dur": 0.062, + "args": { + "External id": 299052,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996482.392, "dur": 0.054, + "args": { + "External id": 299053,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996482.793, "dur": 0.056, + "args": { + "External id": 299054,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996483.225, "dur": 0.056, + "args": { + "External id": 299055,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996483.668, "dur": 0.052, + "args": { + "External id": 299056,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996484.114, "dur": 0.059, + "args": { + "External id": 299057,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996484.571, "dur": 0.062, + "args": { + "External id": 299058,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996485.027, "dur": 0.063, + "args": { + "External id": 299059,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996485.489, "dur": 0.062, + "args": { + "External id": 299060,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996485.900, "dur": 0.058, + "args": { + "External id": 299061,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996486.294, "dur": 0.054, + "args": { + "External id": 299062,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996486.846, "dur": 0.057, + "args": { + "External id": 299063,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996487.264, "dur": 0.057, + "args": { + "External id": 299064,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996487.596, "dur": 0.055, + "args": { + "External id": 299065,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996487.983, "dur": 0.050, + "args": { + "External id": 299066,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996488.455, "dur": 0.062, + "args": { + "External id": 299067,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996488.882, "dur": 0.255, + "args": { + "External id": 299068,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996489.504, "dur": 0.065, + "args": { + "External id": 299069,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996489.891, "dur": 0.279, + "args": { + "External id": 299070,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996490.533, "dur": 0.053, + "args": { + "External id": 299071,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996490.957, "dur": 0.063, + "args": { + "External id": 299072,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996491.447, "dur": 0.060, + "args": { + "External id": 299073,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996491.989, "dur": 0.243, + "args": { + "External id": 299074,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996492.636, "dur": 0.056, + "args": { + "External id": 299075,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996493.044, "dur": 0.249, + "args": { + "External id": 299076,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996493.891, "dur": 0.075, + "args": { + "External id": 299077,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996494.323, "dur": 0.071, + "args": { + "External id": 299078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996494.818, "dur": 0.056, + "args": { + "External id": 299079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996495.255, "dur": 0.056, + "args": { + "External id": 299080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996495.946, "dur": 0.057, + "args": { + "External id": 299081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996496.360, "dur": 0.057, + "args": { + "External id": 299082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996496.806, "dur": 0.056, + "args": { + "External id": 299083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996497.453, "dur": 0.053, + "args": { + "External id": 299084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996497.864, "dur": 0.057, + "args": { + "External id": 299085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996498.291, "dur": 0.059, + "args": { + "External id": 299086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996498.730, "dur": 0.050, + "args": { + "External id": 299087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996499.121, "dur": 0.056, + "args": { + "External id": 299088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996499.579, "dur": 0.056, + "args": { + "External id": 299089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996499.975, "dur": 0.063, + "args": { + "External id": 299090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996500.345, "dur": 0.054, + "args": { + "External id": 299091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996500.813, "dur": 0.052, + "args": { + "External id": 299092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996501.244, "dur": 0.058, + "args": { + "External id": 299093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996501.679, "dur": 0.047, + "args": { + "External id": 299094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996502.041, "dur": 0.057, + "args": { + "External id": 299095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996502.477, "dur": 0.054, + "args": { + "External id": 299096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996502.867, "dur": 0.052, + "args": { + "External id": 299097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996503.340, "dur": 0.291, + "args": { + "External id": 299098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996504.013, "dur": 0.291, + "args": { + "External id": 299099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996504.692, "dur": 0.063, + "args": { + "External id": 299100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996505.161, "dur": 0.100, + "args": { + "External id": 299101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996505.684, "dur": 0.308, + "args": { + "External id": 299102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996506.295, "dur": 0.060, + "args": { + "External id": 299103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996506.702, "dur": 0.249, + "args": { + "External id": 299104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996507.272, "dur": 0.248, + "args": { + "External id": 299105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996507.886, "dur": 0.082, + "args": { + "External id": 299106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996508.328, "dur": 0.304, + "args": { + "External id": 299107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996508.962, "dur": 0.058, + "args": { + "External id": 299108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996509.401, "dur": 0.246, + "args": { + "External id": 299109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996510.068, "dur": 0.059, + "args": { + "External id": 299110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996510.575, "dur": 0.049, + "args": { + "External id": 299111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996510.980, "dur": 0.235, + "args": { + "External id": 299112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996511.489, "dur": 0.228, + "args": { + "External id": 299113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996512.075, "dur": 0.071, + "args": { + "External id": 299114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996512.543, "dur": 0.270, + "args": { + "External id": 299115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996513.108, "dur": 0.098, + "args": { + "External id": 299116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996513.544, "dur": 0.104, + "args": { + "External id": 299117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996513.972, "dur": 0.053, + "args": { + "External id": 299118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996514.378, "dur": 0.067, + "args": { + "External id": 299119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996514.821, "dur": 0.057, + "args": { + "External id": 299120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996515.226, "dur": 0.053, + "args": { + "External id": 299121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996515.597, "dur": 0.056, + "args": { + "External id": 299122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996516.010, "dur": 0.065, + "args": { + "External id": 299123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996516.462, "dur": 0.066, + "args": { + "External id": 299124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996516.861, "dur": 0.091, + "args": { + "External id": 299125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996517.258, "dur": 0.066, + "args": { + "External id": 299126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996517.632, "dur": 0.068, + "args": { + "External id": 299127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996518.111, "dur": 0.049, + "args": { + "External id": 299128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996518.577, "dur": 0.047, + "args": { + "External id": 299129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996519.027, "dur": 0.060, + "args": { + "External id": 299130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996519.372, "dur": 0.055, + "args": { + "External id": 299131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996519.800, "dur": 0.061, + "args": { + "External id": 299132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996520.138, "dur": 0.070, + "args": { + "External id": 299133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996520.565, "dur": 0.066, + "args": { + "External id": 299134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996521.012, "dur": 0.062, + "args": { + "External id": 299135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996521.400, "dur": 0.058, + "args": { + "External id": 299136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996521.838, "dur": 0.075, + "args": { + "External id": 299137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996522.251, "dur": 0.269, + "args": { + "External id": 299138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996522.829, "dur": 0.057, + "args": { + "External id": 299139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996523.301, "dur": 0.090, + "args": { + "External id": 299140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996523.759, "dur": 0.284, + "args": { + "External id": 299141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996524.416, "dur": 0.235, + "args": { + "External id": 299142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996525.017, "dur": 0.063, + "args": { + "External id": 299143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996525.460, "dur": 0.053, + "args": { + "External id": 299144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996525.867, "dur": 0.057, + "args": { + "External id": 299145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996526.266, "dur": 0.055, + "args": { + "External id": 299146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996526.607, "dur": 0.257, + "args": { + "External id": 299147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996527.272, "dur": 0.054, + "args": { + "External id": 299148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996527.674, "dur": 0.053, + "args": { + "External id": 299149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996528.072, "dur": 0.052, + "args": { + "External id": 299150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996528.395, "dur": 0.055, + "args": { + "External id": 299151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996528.908, "dur": 0.058, + "args": { + "External id": 299152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996529.324, "dur": 0.060, + "args": { + "External id": 299153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996529.799, "dur": 0.054, + "args": { + "External id": 299154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996530.162, "dur": 0.066, + "args": { + "External id": 299155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996530.579, "dur": 0.058, + "args": { + "External id": 299156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996531.046, "dur": 0.046, + "args": { + "External id": 299157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996531.491, "dur": 0.062, + "args": { + "External id": 299158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996531.894, "dur": 0.056, + "args": { + "External id": 299159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996532.288, "dur": 0.052, + "args": { + "External id": 299160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996532.598, "dur": 0.055, + "args": { + "External id": 299161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996533.004, "dur": 0.056, + "args": { + "External id": 299162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996533.375, "dur": 0.056, + "args": { + "External id": 299163,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996533.787, "dur": 0.057, + "args": { + "External id": 299164,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996534.224, "dur": 0.057, + "args": { + "External id": 299165,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996534.648, "dur": 0.053, + "args": { + "External id": 299166,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996535.039, "dur": 0.056, + "args": { + "External id": 299167,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996535.399, "dur": 0.058, + "args": { + "External id": 299168,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996535.782, "dur": 0.056, + "args": { + "External id": 299169,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996536.248, "dur": 0.051, + "args": { + "External id": 299170,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996536.685, "dur": 0.229, + "args": { + "External id": 299171,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996537.259, "dur": 0.058, + "args": { + "External id": 299172,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996537.735, "dur": 0.060, + "args": { + "External id": 299173,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996538.151, "dur": 0.053, + "args": { + "External id": 299174,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996538.557, "dur": 0.080, + "args": { + "External id": 299175,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996538.937, "dur": 0.057, + "args": { + "External id": 299176,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996539.318, "dur": 0.225, + "args": { + "External id": 299177,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996539.878, "dur": 0.075, + "args": { + "External id": 299178,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996540.309, "dur": 0.272, + "args": { + "External id": 299179,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996540.951, "dur": 0.350, + "args": { + "External id": 299180,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996541.719, "dur": 0.311, + "args": { + "External id": 299181,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996542.439, "dur": 0.053, + "args": { + "External id": 299182,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996542.852, "dur": 0.054, + "args": { + "External id": 299183,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996543.261, "dur": 0.049, + "args": { + "External id": 299184,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996549.064, "dur": 0.062, + "args": { + "External id": 299185,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996549.850, "dur": 0.055, + "args": { + "External id": 299186,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996550.267, "dur": 0.052, + "args": { + "External id": 299187,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996550.690, "dur": 0.044, + "args": { + "External id": 299188,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996551.134, "dur": 0.042, + "args": { + "External id": 299189,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996551.556, "dur": 0.053, + "args": { + "External id": 299190,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996551.984, "dur": 0.059, + "args": { + "External id": 299191,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996552.444, "dur": 0.064, + "args": { + "External id": 299192,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996552.872, "dur": 0.053, + "args": { + "External id": 299193,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996553.321, "dur": 0.056, + "args": { + "External id": 299194,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996553.812, "dur": 0.044, + "args": { + "External id": 299195,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996554.133, "dur": 0.057, + "args": { + "External id": 299196,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996554.569, "dur": 0.081, + "args": { + "External id": 299197,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996554.916, "dur": 0.054, + "args": { + "External id": 299198,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996555.308, "dur": 0.056, + "args": { + "External id": 299199,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996555.713, "dur": 0.055, + "args": { + "External id": 299200,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996556.096, "dur": 0.065, + "args": { + "External id": 299201,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996556.501, "dur": 0.058, + "args": { + "External id": 299202,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996556.882, "dur": 0.059, + "args": { + "External id": 299203,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996557.204, "dur": 0.056, + "args": { + "External id": 299204,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996557.516, "dur": 0.057, + "args": { + "External id": 299205,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996557.836, "dur": 0.056, + "args": { + "External id": 299206,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996558.281, "dur": 0.056, + "args": { + "External id": 299207,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996558.697, "dur": 0.055, + "args": { + "External id": 299208,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996559.024, "dur": 0.059, + "args": { + "External id": 299209,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996559.452, "dur": 0.056, + "args": { + "External id": 299210,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996559.869, "dur": 0.054, + "args": { + "External id": 299211,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996560.215, "dur": 0.069, + "args": { + "External id": 299212,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996560.655, "dur": 0.068, + "args": { + "External id": 299213,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996561.254, "dur": 0.060, + "args": { + "External id": 299214,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996561.639, "dur": 0.055, + "args": { + "External id": 299215,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996562.033, "dur": 0.058, + "args": { + "External id": 299216,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996562.352, "dur": 0.059, + "args": { + "External id": 299217,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996562.768, "dur": 0.053, + "args": { + "External id": 299218,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996563.130, "dur": 0.067, + "args": { + "External id": 299219,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996563.480, "dur": 0.055, + "args": { + "External id": 299220,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996563.842, "dur": 0.066, + "args": { + "External id": 299221,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996564.166, "dur": 0.061, + "args": { + "External id": 299222,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996564.580, "dur": 0.047, + "args": { + "External id": 299223,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996564.988, "dur": 0.050, + "args": { + "External id": 299224,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996565.321, "dur": 0.066, + "args": { + "External id": 299225,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996565.725, "dur": 0.066, + "args": { + "External id": 299226,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996566.176, "dur": 0.070, + "args": { + "External id": 299227,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996566.689, "dur": 0.059, + "args": { + "External id": 299228,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996567.095, "dur": 0.057, + "args": { + "External id": 299229,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996567.443, "dur": 0.054, + "args": { + "External id": 299230,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996567.875, "dur": 0.053, + "args": { + "External id": 299231,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996568.298, "dur": 0.051, + "args": { + "External id": 299232,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996568.737, "dur": 0.054, + "args": { + "External id": 299233,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996569.157, "dur": 0.058, + "args": { + "External id": 299234,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996569.477, "dur": 0.058, + "args": { + "External id": 299235,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996569.895, "dur": 0.052, + "args": { + "External id": 299236,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996570.274, "dur": 0.050, + "args": { + "External id": 299237,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996570.659, "dur": 0.056, + "args": { + "External id": 299238,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996571.061, "dur": 0.054, + "args": { + "External id": 299239,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996571.422, "dur": 0.087, + "args": { + "External id": 299240,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996571.808, "dur": 0.304, + "args": { + "External id": 299241,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996572.412, "dur": 0.056, + "args": { + "External id": 299242,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996572.729, "dur": 0.057, + "args": { + "External id": 299243,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996573.044, "dur": 0.059, + "args": { + "External id": 299244,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996573.445, "dur": 0.056, + "args": { + "External id": 299245,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996573.774, "dur": 0.054, + "args": { + "External id": 299246,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996574.106, "dur": 0.055, + "args": { + "External id": 299247,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996574.489, "dur": 0.060, + "args": { + "External id": 299248,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996574.872, "dur": 0.058, + "args": { + "External id": 299249,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996575.268, "dur": 0.053, + "args": { + "External id": 299250,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996575.731, "dur": 0.055, + "args": { + "External id": 299251,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996576.056, "dur": 0.059, + "args": { + "External id": 299252,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996576.496, "dur": 0.070, + "args": { + "External id": 299253,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996576.909, "dur": 0.052, + "args": { + "External id": 299254,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996577.307, "dur": 0.067, + "args": { + "External id": 299255,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996577.755, "dur": 0.242, + "args": { + "External id": 299256,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996578.362, "dur": 0.259, + "args": { + "External id": 299257,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996579.040, "dur": 0.243, + "args": { + "External id": 299258,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996579.575, "dur": 0.239, + "args": { + "External id": 299259,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996580.236, "dur": 0.260, + "args": { + "External id": 299260,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996580.813, "dur": 0.281, + "args": { + "External id": 299261,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996581.447, "dur": 0.264, + "args": { + "External id": 299262,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996582.022, "dur": 0.067, + "args": { + "External id": 299263,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996582.457, "dur": 0.254, + "args": { + "External id": 299264,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996583.013, "dur": 0.059, + "args": { + "External id": 299265,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996583.427, "dur": 0.066, + "args": { + "External id": 299266,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996583.816, "dur": 0.061, + "args": { + "External id": 299267,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996584.209, "dur": 0.066, + "args": { + "External id": 299268,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996584.680, "dur": 0.067, + "args": { + "External id": 299269,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996585.040, "dur": 0.067, + "args": { + "External id": 299270,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996585.491, "dur": 0.069, + "args": { + "External id": 299271,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996586.010, "dur": 0.068, + "args": { + "External id": 299272,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996586.446, "dur": 0.070, + "args": { + "External id": 299273,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996586.900, "dur": 0.059, + "args": { + "External id": 299274,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996587.331, "dur": 0.059, + "args": { + "External id": 299275,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996587.688, "dur": 0.059, + "args": { + "External id": 299276,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996588.080, "dur": 0.053, + "args": { + "External id": 299277,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996588.402, "dur": 0.288, + "args": { + "External id": 299278,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996589.052, "dur": 0.056, + "args": { + "External id": 299279,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996589.486, "dur": 0.051, + "args": { + "External id": 299280,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996589.900, "dur": 0.051, + "args": { + "External id": 299281,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996590.416, "dur": 0.057, + "args": { + "External id": 299282,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996590.796, "dur": 0.242, + "args": { + "External id": 299283,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996591.395, "dur": 0.059, + "args": { + "External id": 299284,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996591.818, "dur": 0.060, + "args": { + "External id": 299285,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996592.282, "dur": 0.266, + "args": { + "External id": 299286,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996592.886, "dur": 0.054, + "args": { + "External id": 299287,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996593.241, "dur": 0.419, + "args": { + "External id": 299288,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996593.928, "dur": 0.377, + "args": { + "External id": 299289,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996594.660, "dur": 0.236, + "args": { + "External id": 299290,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996595.358, "dur": 0.232, + "args": { + "External id": 299291,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996595.873, "dur": 0.365, + "args": { + "External id": 299292,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996596.646, "dur": 0.239, + "args": { + "External id": 299293,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996597.165, "dur": 0.050, + "args": { + "External id": 299294,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996597.536, "dur": 0.057, + "args": { + "External id": 299295,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996597.836, "dur": 0.051, + "args": { + "External id": 299296,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996598.250, "dur": 0.056, + "args": { + "External id": 299297,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996598.544, "dur": 0.050, + "args": { + "External id": 299298,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996603.170, "dur": 0.069, + "args": { + "External id": 299299,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996603.533, "dur": 0.053, + "args": { + "External id": 299300,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996604.124, "dur": 0.058, + "args": { + "External id": 299301,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996604.553, "dur": 0.054, + "args": { + "External id": 299302,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996604.992, "dur": 0.054, + "args": { + "External id": 299303,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996605.389, "dur": 0.053, + "args": { + "External id": 299304,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070554, "tid": 2070554, + "ts": 5333368996605.862, "dur": 0.051, + "args": { + "External id": 299305,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070554, "tid": 2070554, + "ts": 5333368997225.967, "dur": 3539.856, + "args": { + "External id": 299306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "3.6769426533212504e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070554, "tid": 2070554, + "ts": 5333369000235.772, "dur": 348.738, + "args": { + "External id": 299307,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "3.6769426533212504e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9719 + } + }, + { + "name": "process_name", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 0, + "args": { + "sort_index": 2070554 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 2107619, + "args": { + "name": "thread 2107619 (pt_autograd_7)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 2107619, + "args": { + "sort_index": 2107619 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 2107619, + "args": { + "name": "thread 2107619 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 2107619, + "args": { + "sort_index": 2107619 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 2070554, + "args": { + "name": "thread 2070554 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952314.889, "pid": 2070554, "tid": 2070554, + "args": { + "sort_index": 2070554 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 5333366952248.661, "dur": 2050413.353, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5333366952248.661, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 5333366952248.661 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 5333369050197.844 + } + ], + "traceName": "exp/top.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace/iteration_14848/rank7_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file